1 /* Copyright (c) 2000, 2021, Oracle and/or its affiliates.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
22 
23 /**
24   @file
25 
26   @brief Evaluate query expressions, throughout resolving, optimization and
27          execution.
28 
29   @defgroup Query_Optimizer  Query Optimizer
30   @{
31 */
32 
33 #include "sql_select.h"
34 #include "sql_table.h"                          // primary_key_name
35 #include "sql_derived.h"
36 #include "probes_mysql.h"
37 #include "opt_trace.h"
38 #include "key.h"                 // key_copy, key_cmp, key_cmp_if_same
39 #include "lock.h"                // mysql_unlock_some_tables,
40                                  // mysql_unlock_read_tables
41 #include "sql_show.h"            // append_identifier
42 #include "sql_base.h"
43 #include "auth_common.h"         // *_ACL
44 #include "sql_test.h"            // misc. debug printing utilities
45 #include "records.h"             // init_read_record, end_read_record
46 #include "filesort.h"            // filesort_free_buffers
47 #include "opt_explain.h"
48 #include "sql_join_buffer.h"     // JOIN_CACHE
49 #include "sql_optimizer.h"       // JOIN
50 #include "sql_tmp_table.h"       // tmp tables
51 #include "debug_sync.h"          // DEBUG_SYNC
52 #include "item_sum.h"            // Item_sum
53 #include "sql_planner.h"         // calculate_condition_filter
54 #include "opt_hints.h"           // hint_key_state()
55 
56 #include <algorithm>
57 
58 using std::max;
59 using std::min;
60 
61 const char store_key_const_item::static_name[]= "const";
62 
63 static store_key *get_store_key(THD *thd,
64 				Key_use *keyuse, table_map used_tables,
65 				KEY_PART_INFO *key_part, uchar *key_buff,
66 				uint maybe_null);
67 bool const_expression_in_where(Item *conds,Item *item, Item **comp_item);
68 uint find_shortest_key(TABLE *table, const key_map *usable_keys);
69 /**
70   Handle a data manipulation query, from preparation through cleanup
71 
72   @param thd       thread handler
73   @param lex       query to be processed
74   @param result    sink of result of query execution.
75                    may be protocol object (for passing result to a client),
76                    insert object, update object, delete object, etc.
77   @param added_options additional options for detailed control over execution
78   @param removed_options options that are not applicable for this command
79 
80   @returns false if success, true if error
81 
82   @details
83     Processing a query goes through 5 phases (parsing is already done)
84      - Preparation
85      - Locking of tables
86      - Optimization
87      - Execution or explain
88      - Cleanup
89     The queries handled by this function are:
90 
91     SELECT
92     INSERT ... SELECT
93     REPLACE ... SELECT
94     UPDATE (multi-table)
95     DELETE (multi-table)
96 
97     @todo make this function also handle INSERT ... VALUES, single-table
98           UPDATE and DELETE, SET and DO.
99 
100     The function processes simple query expressions without UNION and
101     without multi-level ORDER BY/LIMIT separately.
102     Such queries are executed with a more direct code path.
103 */
handle_query(THD * thd,LEX * lex,Query_result * result,ulonglong added_options,ulonglong removed_options)104 bool handle_query(THD *thd, LEX *lex, Query_result *result,
105                   ulonglong added_options, ulonglong removed_options)
106 {
107   DBUG_ENTER("handle_query");
108 
109   SELECT_LEX_UNIT *const unit= lex->unit;
110   SELECT_LEX *const select= unit->first_select();
111   bool res;
112 
113   assert(thd == unit->thd);
114 
115   assert(!unit->is_prepared() && !unit->is_optimized() &&
116          !unit->is_executed());
117 
118   if (lex->proc_analyse && lex->sql_command != SQLCOM_SELECT)
119   {
120     my_error(ER_WRONG_USAGE, MYF(0), "PROCEDURE", "non-SELECT");
121     DBUG_RETURN(true);
122   }
123 
124   const bool single_query= unit->is_simple();
125 
126   lex->used_tables=0;                         // Updated by setup_fields
127 
128   THD_STAGE_INFO(thd, stage_init);
129 
130   if (single_query)
131   {
132     unit->set_limit(unit->global_parameters());
133 
134     select->context.resolve_in_select_list= true;
135     select->set_query_result(result);
136     select->make_active_options(added_options, removed_options);
137     select->fields_list= select->item_list;
138 
139     if (select->prepare(thd))
140       goto err;
141 
142     unit->set_prepared();
143   }
144   else
145   {
146     if (unit->prepare(thd, result, SELECT_NO_UNLOCK | added_options,
147                       removed_options))
148       goto err;
149   }
150 
151   assert(!lex->is_query_tables_locked());
152   /*
153     Locking of tables is done after preparation but before optimization.
154     This allows to do better partition pruning and avoid locking unused
155     partitions. As a consequence, in such a case, prepare stage can rely only
156     on metadata about tables used and not data from them.
157   */
158   if (lock_tables(thd, lex->query_tables, lex->table_count, 0))
159     goto err;
160 
161   /*
162     Register query result in cache.
163     Tables must be locked before storing the query in the query cache.
164     Transactional engines must be signalled that the statement has started,
165     by calling external_lock().
166   */
167   query_cache.store_query(thd, lex->query_tables);
168 
169   if (single_query)
170   {
171     if (select->optimize(thd))
172       goto err;
173 
174     unit->set_optimized();
175   }
176   else
177   {
178     if (unit->optimize(thd))
179       goto err;
180   }
181 
182   if (lex->is_explain())
183   {
184     if (explain_query(thd, unit))
185       goto err;     /* purecov: inspected */
186   }
187   else
188   {
189     if (single_query)
190     {
191       select->join->exec();
192       unit->set_executed();
193       if (thd->is_error())
194         goto err;
195     }
196     else
197     {
198       if (unit->execute(thd))
199         goto err;
200     }
201   }
202 
203   assert(!thd->is_error());
204 
205   thd->update_previous_found_rows();
206   THD_STAGE_INFO(thd, stage_end);
207 
208   // Do partial cleanup (preserve plans for EXPLAIN).
209   res= unit->cleanup(false);
210 
211   DBUG_RETURN(res);
212 
213 err:
214   assert(thd->is_error() || thd->killed);
215   DBUG_PRINT("info",("report_error: %d", thd->is_error()));
216   THD_STAGE_INFO(thd, stage_end);
217 
218   (void) unit->cleanup(false);
219 
220   // Abort the result set (if it has been prepared).
221   result->abort_result_set();
222 
223   DBUG_RETURN(thd->is_error());
224 }
225 
226 
227 /*****************************************************************************
228   Check fields, find best join, do the select and output fields.
229   All tables must be opened.
230 *****************************************************************************/
231 
232 /**
233   @brief Check if two items are compatible wrt. materialization.
234 
235   @param outer Expression from outer query
236   @param inner Expression from inner query
237 
238   @retval TRUE   If subquery types allow materialization.
239   @retval FALSE  Otherwise.
240 */
241 
types_allow_materialization(Item * outer,Item * inner)242 bool types_allow_materialization(Item *outer, Item *inner)
243 
244 {
245   if (outer->result_type() != inner->result_type())
246     return false;
247   switch (outer->result_type()) {
248   case ROW_RESULT:
249     // Materialization of rows nested inside rows is not currently supported.
250     return false;
251   case STRING_RESULT:
252     if (outer->is_temporal_with_date() != inner->is_temporal_with_date())
253       return false;
254     if (!(outer->collation.collation == inner->collation.collation
255         /*&& outer->max_length <= inner->max_length */))
256       return false;
257   /*case INT_RESULT:
258     if (!(outer->unsigned_flag ^ inner->unsigned_flag))
259       return false; */
260   default:
261     ;                 /* suitable for materialization */
262   }
263   return true;
264 }
265 
266 
267 /*
268   Check if the table's rowid is included in the temptable
269 
270   SYNOPSIS
271     sj_table_is_included()
272       join      The join
273       join_tab  The table to be checked
274 
275   DESCRIPTION
276     SemiJoinDuplicateElimination: check the table's rowid should be included
277     in the temptable. This is so if
278 
279     1. The table is not embedded within some semi-join nest
280     2. The has been pulled out of a semi-join nest, or
281 
282     3. The table is functionally dependent on some previous table
283 
284     [4. This is also true for constant tables that can't be
285         NULL-complemented but this function is not called for such tables]
286 
287   RETURN
288     TRUE  - Include table's rowid
289     FALSE - Don't
290 */
291 
sj_table_is_included(JOIN * join,JOIN_TAB * join_tab)292 static bool sj_table_is_included(JOIN *join, JOIN_TAB *join_tab)
293 {
294   if (join_tab->emb_sj_nest)
295     return FALSE;
296 
297   /* Check if this table is functionally dependent on the tables that
298      are within the same outer join nest
299   */
300   TABLE_LIST *embedding= join_tab->table_ref->embedding;
301   if (join_tab->type() == JT_EQ_REF)
302   {
303     table_map depends_on= 0;
304     uint idx;
305 
306     for (uint kp= 0; kp < join_tab->ref().key_parts; kp++)
307       depends_on |= join_tab->ref().items[kp]->used_tables();
308 
309     Table_map_iterator it(depends_on & ~PSEUDO_TABLE_BITS);
310     while ((idx= it.next_bit())!=Table_map_iterator::BITMAP_END)
311     {
312       JOIN_TAB *ref_tab= join->map2table[idx];
313       if (embedding != ref_tab->table_ref->embedding)
314         return TRUE;
315     }
316     /* Ok, functionally dependent */
317     return FALSE;
318   }
319   /* Not functionally dependent => need to include*/
320   return TRUE;
321 }
322 
323 
324 /**
325   Setup the strategies to eliminate semi-join duplicates.
326 
327   @param join           Join to process
328   @param no_jbuf_after  Do not use join buffering after the table with this
329                         number
330 
331   @retval FALSE  OK
332   @retval TRUE   Out of memory error
333 
334   @details
335     Setup the strategies to eliminate semi-join duplicates.
336     At the moment there are 5 strategies:
337 
338     1. DuplicateWeedout (use of temptable to remove duplicates based on rowids
339                          of row combinations)
340     2. FirstMatch (pick only the 1st matching row combination of inner tables)
341     3. LooseScan (scanning the sj-inner table in a way that groups duplicates
342                   together and picking the 1st one)
343     4. MaterializeLookup (Materialize inner tables, then setup a scan over
344                           outer correlated tables, lookup in materialized table)
345     5. MaterializeScan (Materialize inner tables, then setup a scan over
346                         materialized tables, perform lookup in outer tables)
347 
348     The join order has "duplicate-generating ranges", and every range is
349     served by one strategy or a combination of FirstMatch with with some
350     other strategy.
351 
352     "Duplicate-generating range" is defined as a range within the join order
353     that contains all of the inner tables of a semi-join. All ranges must be
354     disjoint, if tables of several semi-joins are interleaved, then the ranges
355     are joined together, which is equivalent to converting
356       SELECT ... WHERE oe1 IN (SELECT ie1 ...) AND oe2 IN (SELECT ie2 )
357     to
358       SELECT ... WHERE (oe1, oe2) IN (SELECT ie1, ie2 ... ...)
359     .
360 
361     Applicability conditions are as follows:
362 
363     DuplicateWeedout strategy
364     ~~~~~~~~~~~~~~~~~~~~~~~~~
365 
366       (ot|nt)*  [ it ((it|ot|nt)* (it|ot))]  (nt)*
367       +------+  +=========================+  +---+
368         (1)                 (2)               (3)
369 
370        (1) - Prefix of OuterTables (those that participate in
371              IN-equality and/or are correlated with subquery) and outer
372              Non-correlated tables.
373        (2) - The handled range. The range starts with the first sj-inner
374              table, and covers all sj-inner and outer tables
375              Within the range,  Inner, Outer, outer non-correlated tables
376              may follow in any order.
377        (3) - The suffix of outer non-correlated tables.
378 
379     FirstMatch strategy
380     ~~~~~~~~~~~~~~~~~~~
381 
382       (ot|nt)*  [ it ((it|nt)* it) ]  (nt)*
383       +------+  +==================+  +---+
384         (1)             (2)          (3)
385 
386       (1) - Prefix of outer correlated and non-correlated tables
387       (2) - The handled range, which may contain only inner and
388             non-correlated tables.
389       (3) - The suffix of outer non-correlated tables.
390 
391     LooseScan strategy
392     ~~~~~~~~~~~~~~~~~~
393 
394      (ot|ct|nt) [ loosescan_tbl (ot|nt|it)* it ]  (ot|nt)*
395      +--------+   +===========+ +=============+   +------+
396         (1)           (2)          (3)              (4)
397 
398       (1) - Prefix that may contain any outer tables. The prefix must contain
399             all the non-trivially correlated outer tables. (non-trivially means
400             that the correlation is not just through the IN-equality).
401 
402       (2) - Inner table for which the LooseScan scan is performed.
403             Notice that special requirements for existence of certain indexes
404             apply to this table, @see class Loose_scan_opt.
405 
406       (3) - The remainder of the duplicate-generating range. It is served by
407             application of FirstMatch strategy. Outer IN-correlated tables
408             must be correlated to the LooseScan table but not to the inner
409             tables in this range. (Currently, there can be no outer tables
410             in this range because of implementation restrictions,
411             @see Optimize_table_order::advance_sj_state()).
412 
413       (4) - The suffix of outer correlated and non-correlated tables.
414 
415     MaterializeLookup strategy
416     ~~~~~~~~~~~~~~~~~~~~~~~~~~
417 
418      (ot|nt)*  [ it (it)* ]  (nt)*
419      +------+  +==========+  +---+
420         (1)         (2)        (3)
421 
422       (1) - Prefix of outer correlated and non-correlated tables.
423 
424       (2) - The handled range, which may contain only inner tables.
425             The inner tables are materialized in a temporary table that is
426             later used as a lookup structure for the outer correlated tables.
427 
428       (3) - The suffix of outer non-correlated tables.
429 
430     MaterializeScan strategy
431     ~~~~~~~~~~~~~~~~~~~~~~~~~~
432 
433      (ot|nt)*  [ it (it)* ]  (ot|nt)*
434      +------+  +==========+  +-----+
435         (1)         (2)         (3)
436 
437       (1) - Prefix of outer correlated and non-correlated tables.
438 
439       (2) - The handled range, which may contain only inner tables.
440             The inner tables are materialized in a temporary table which is
441             later used to setup a scan.
442 
443       (3) - The suffix of outer correlated and non-correlated tables.
444 
445   Note that MaterializeLookup and MaterializeScan has overlap in their patterns.
446   It may be possible to consolidate the materialization strategies into one.
447 
448   The choice between the strategies is made by the join optimizer (see
449   advance_sj_state() and fix_semijoin_strategies()).
450   This function sets up all fields/structures/etc needed for execution except
451   for setup/initialization of semi-join materialization which is done in
452   setup_materialized_table().
453 */
454 
setup_semijoin_dups_elimination(JOIN * join,uint no_jbuf_after)455 static bool setup_semijoin_dups_elimination(JOIN *join, uint no_jbuf_after)
456 {
457   uint tableno;
458   THD *thd= join->thd;
459   DBUG_ENTER("setup_semijoin_dups_elimination");
460   ASSERT_BEST_REF_IN_JOIN_ORDER(join);
461 
462   if (join->select_lex->sj_nests.is_empty())
463     DBUG_RETURN(FALSE);
464 
465   QEP_TAB *const qep_array= join->qep_tab;
466   for (tableno= join->const_tables; tableno < join->primary_tables; )
467   {
468 #ifndef NDEBUG
469     const bool tab_in_sj_nest= join->best_ref[tableno]->emb_sj_nest != NULL;
470 #endif
471     QEP_TAB *const tab= &qep_array[tableno];
472     POSITION *const pos= tab->position();
473 
474     if (pos->sj_strategy == SJ_OPT_NONE)
475     {
476       tableno++;  // nothing to do
477       continue;
478     }
479     QEP_TAB *last_sj_tab= tab + pos->n_sj_tables - 1;
480     switch (pos->sj_strategy) {
481       case SJ_OPT_MATERIALIZE_LOOKUP:
482       case SJ_OPT_MATERIALIZE_SCAN:
483         assert(false); // Should not occur among "primary" tables
484         // Do nothing
485         tableno+= pos->n_sj_tables;
486         break;
487       case SJ_OPT_LOOSE_SCAN:
488       {
489         assert(tab_in_sj_nest); // First table must be inner
490         /* We jump from the last table to the first one */
491         tab->match_tab= last_sj_tab->idx();
492 
493         /* For LooseScan, duplicate elimination is based on rows being sorted
494            on key. We need to make sure that range select keeps the sorted index
495            order. (When using MRR it may not.)
496 
497            Note: need_sorted_output() implementations for range select classes
498            that do not support sorted output, will trigger an assert. This
499            should not happen since LooseScan strategy is only picked if sorted
500            output is supported.
501         */
502         if (tab->quick())
503         {
504           assert(tab->quick()->index == pos->loosescan_key);
505           tab->quick()->need_sorted_output();
506         }
507 
508         const uint keyno= pos->loosescan_key;
509         assert(tab->keys().is_set(keyno));
510         tab->set_index(keyno);
511 
512         /* Calculate key length */
513         uint keylen= 0;
514         for (uint kp= 0; kp < pos->loosescan_parts; kp++)
515           keylen+= tab->table()->key_info[keyno].key_part[kp].store_length;
516         tab->loosescan_key_len= keylen;
517 
518         if (pos->n_sj_tables > 1)
519         {
520           last_sj_tab->firstmatch_return= tab->idx();
521           last_sj_tab->match_tab= last_sj_tab->idx();
522         }
523         tableno+= pos->n_sj_tables;
524         break;
525       }
526       case SJ_OPT_DUPS_WEEDOUT:
527       {
528         assert(tab_in_sj_nest); // First table must be inner
529         /*
530           Consider a semijoin of one outer and one inner table, both
531           with two rows. The inner table is assumed to be confluent
532           (See sj_opt_materialize_lookup)
533 
534           If normal nested loop execution is used, we do not need to
535           include semi-join outer table rowids in the duplicate
536           weedout temp table since NL guarantees that outer table rows
537           are encountered only consecutively and because all rows in
538           the temp table are deleted for every new outer table
539           combination (example is with a confluent inner table):
540 
541             ot1.row1|it1.row1
542                  '-> temp table's have_confluent_row == FALSE
543                    |-> output ot1.row1
544                    '-> set have_confluent_row= TRUE
545             ot1.row1|it1.row2
546                  |-> temp table's have_confluent_row == TRUE
547                  | '-> do not output ot1.row1
548                  '-> no more join matches - set have_confluent_row= FALSE
549             ot1.row2|it1.row1
550                  '-> temp table's have_confluent_row == FALSE
551                    |-> output ot1.row2
552                    '-> set have_confluent_row= TRUE
553               ...
554 
555           Note: not having outer table rowids in the temp table and
556           then emptying the temp table when a new outer table row
557           combinition is encountered is an optimization. Including
558           outer table rowids in the temp table is not harmful but
559           wastes memory.
560 
561           Now consider the join buffering algorithms (BNL/BKA). These
562           algorithms join each inner row with outer rows in "reverse"
563           order compared to NL. Effectively, this means that outer
564           table rows may be encountered multiple times in a
565           non-consecutive manner:
566 
567             NL:                 BNL/BKA:
568             ot1.row1|it1.row1   ot1.row1|it1.row1
569             ot1.row1|it1.row2   ot1.row2|it1.row1
570             ot1.row2|it1.row1   ot1.row1|it1.row2
571             ot1.row2|it1.row2   ot1.row2|it1.row2
572 
573           It is clear from the above that there is no place we can
574           empty the temp table like we do in NL to avoid storing outer
575           table rowids.
576 
577           Below we check if join buffering might be used. If so, set
578           first_table to the first non-constant table so that outer
579           table rowids are included in the temp table. Do not destroy
580           other duplicate elimination methods.
581         */
582         uint first_table= tableno;
583         for (uint sj_tableno= tableno;
584              sj_tableno < tableno + pos->n_sj_tables;
585              sj_tableno++)
586         {
587           if (join->best_ref[sj_tableno]->use_join_cache() &&
588               sj_tableno <= no_jbuf_after)
589           {
590             /* Join buffering will probably be used */
591             first_table= join->const_tables;
592             break;
593           }
594         }
595 
596         QEP_TAB *const first_sj_tab= qep_array + first_table;
597         if (last_sj_tab->first_inner() != NO_PLAN_IDX &&
598             first_sj_tab->first_inner() != last_sj_tab->first_inner())
599         {
600           /*
601             The first duplicate weedout table is an outer table of an outer join
602             and the last duplicate weedout table is one of the inner tables of
603             the outer join.
604             In this case, we must assure that all the inner tables of the
605             outer join are part of the duplicate weedout operation.
606             This is to assure that NULL-extension for inner tables of an
607             outer join is performed before duplicate elimination is performed,
608             otherwise we will have extra NULL-extended rows being output, which
609             should have been eliminated as duplicates.
610           */
611           QEP_TAB *tab2= &qep_array[last_sj_tab->first_inner()];
612           /*
613             First, locate the table that is the first inner table of the
614             outer join operation that first_sj_tab is outer for.
615           */
616           while (tab2->first_upper() != NO_PLAN_IDX &&
617                  tab2->first_upper() != first_sj_tab->first_inner())
618             tab2= qep_array + tab2->first_upper();
619           // Then, extend the range with all inner tables of the join nest:
620           if (qep_array[tab2->first_inner()].last_inner() > last_sj_tab->idx())
621             last_sj_tab= &qep_array[qep_array[tab2->first_inner()].last_inner()];
622         }
623 
624         SJ_TMP_TABLE::TAB sjtabs[MAX_TABLES];
625         SJ_TMP_TABLE::TAB *last_tab= sjtabs;
626         uint jt_rowid_offset= 0; // # tuple bytes are already occupied (w/o NULL bytes)
627         uint jt_null_bits= 0;    // # null bits in tuple bytes
628         /*
629           Walk through the range and remember
630            - tables that need their rowids to be put into temptable
631            - the last outer table
632         */
633         for (QEP_TAB *tab_in_range= qep_array + first_table;
634              tab_in_range <= last_sj_tab;
635              tab_in_range++)
636         {
637           if (sj_table_is_included(join, join->best_ref[tab_in_range->idx()]))
638           {
639             last_tab->qep_tab= tab_in_range;
640             last_tab->rowid_offset= jt_rowid_offset;
641             jt_rowid_offset += tab_in_range->table()->file->ref_length;
642             if (tab_in_range->table()->is_nullable())
643             {
644               last_tab->null_byte= jt_null_bits / 8;
645               last_tab->null_bit= jt_null_bits++;
646             }
647             last_tab++;
648             tab_in_range->table()->prepare_for_position();
649             tab_in_range->keep_current_rowid= true;
650           }
651         }
652 
653         SJ_TMP_TABLE *sjtbl;
654         if (jt_rowid_offset) /* Temptable has at least one rowid */
655         {
656           size_t tabs_size= (last_tab - sjtabs) * sizeof(SJ_TMP_TABLE::TAB);
657           if (!(sjtbl= new (thd->mem_root) SJ_TMP_TABLE) ||
658               !(sjtbl->tabs= (SJ_TMP_TABLE::TAB*) thd->alloc(tabs_size)))
659             DBUG_RETURN(TRUE); /* purecov: inspected */
660           memcpy(sjtbl->tabs, sjtabs, tabs_size);
661           sjtbl->is_confluent= FALSE;
662           sjtbl->tabs_end= sjtbl->tabs + (last_tab - sjtabs);
663           sjtbl->rowid_len= jt_rowid_offset;
664           sjtbl->null_bits= jt_null_bits;
665           sjtbl->null_bytes= (jt_null_bits + 7)/8;
666           sjtbl->tmp_table=
667             create_duplicate_weedout_tmp_table(thd,
668                                                sjtbl->rowid_len +
669                                                sjtbl->null_bytes,
670                                                sjtbl);
671           if (sjtbl->tmp_table->hash_field)
672             sjtbl->tmp_table->file->ha_index_init(0, 0);
673           join->sj_tmp_tables.push_back(sjtbl->tmp_table);
674         }
675         else
676         {
677           /*
678             This is confluent case where the entire subquery predicate does
679             not depend on anything at all, ie this is
680               WHERE const IN (uncorrelated select)
681           */
682           if (!(sjtbl= new (thd->mem_root) SJ_TMP_TABLE))
683             DBUG_RETURN(TRUE); /* purecov: inspected */
684           sjtbl->tmp_table= NULL;
685           sjtbl->is_confluent= TRUE;
686           sjtbl->have_confluent_row= FALSE;
687         }
688         qep_array[first_table].flush_weedout_table= sjtbl;
689         last_sj_tab->check_weed_out_table= sjtbl;
690 
691         tableno+= pos->n_sj_tables;
692         break;
693       }
694       case SJ_OPT_FIRST_MATCH:
695       {
696         /*
697           Setup a "jump" from the last table in the range of inner tables
698           to the last outer table before the inner tables.
699           If there are outer tables inbetween the inner tables, we have to
700           setup a "split jump": Jump from the last inner table to the last
701           outer table within the range, then from the last inner table
702           before the outer table(s), jump to the last outer table before
703           this range of inner tables, etc.
704         */
705         plan_idx jump_to= tab->idx() - 1;
706         assert(tab_in_sj_nest); // First table must be inner
707         for (QEP_TAB *tab_in_range= tab;
708              tab_in_range <= last_sj_tab;
709              tab_in_range++)
710         {
711           if (!join->best_ref[tab_in_range->idx()]->emb_sj_nest)
712           {
713             /*
714               Let last non-correlated table be jump target for
715               subsequent inner tables.
716             */
717             jump_to= tab_in_range->idx();
718           }
719           else
720           {
721             /*
722               Assign jump target for last table in a consecutive range of
723               inner tables.
724             */
725             if (tab_in_range == last_sj_tab ||
726                 !join->best_ref[tab_in_range->idx() + 1]->emb_sj_nest)
727             {
728               tab_in_range->firstmatch_return= jump_to;
729               tab_in_range->match_tab= last_sj_tab->idx();
730             }
731           }
732         }
733         tableno+= pos->n_sj_tables;
734         break;
735       }
736     }
737   }
738   DBUG_RETURN(FALSE);
739 }
740 
741 
742 /*
743   Destroy all temporary tables created by NL-semijoin runtime
744 */
745 
destroy_sj_tmp_tables(JOIN * join)746 static void destroy_sj_tmp_tables(JOIN *join)
747 {
748   List_iterator<TABLE> it(join->sj_tmp_tables);
749   TABLE *table;
750   while ((table= it++))
751   {
752     /*
753       SJ-Materialization tables are initialized for either sequential reading
754       or index lookup, DuplicateWeedout tables are not initialized for read
755       (we only write to them), so need to call ha_index_or_rnd_end.
756     */
757     table->file->ha_index_or_rnd_end();
758     free_tmp_table(join->thd, table);
759   }
760   join->sj_tmp_tables.empty();
761 }
762 
763 
764 /**
765   Remove all rows from all temp tables used by NL-semijoin runtime
766 
767   @param join  The join to remove tables for
768 
769   All rows must be removed from all temporary tables before every join
770   re-execution.
771 */
772 
clear_sj_tmp_tables(JOIN * join)773 static int clear_sj_tmp_tables(JOIN *join)
774 {
775   int res;
776   List_iterator<TABLE> it(join->sj_tmp_tables);
777   TABLE *table;
778   while ((table= it++))
779   {
780     if ((res= table->file->ha_delete_all_rows()))
781       return res; /* purecov: inspected */
782   }
783   if (join->qep_tab)
784   {
785     Semijoin_mat_exec *sjm;
786     List_iterator<Semijoin_mat_exec> it2(join->sjm_exec_list);
787     while ((sjm= it2++))
788     {
789       {
790         QEP_TAB *const tab= &join->qep_tab[sjm->mat_table_index];
791         /*
792           If zero_result_cause is set, we have not gone through
793           pick_table_access_method() which sets materialize_table, so the
794           assertion is disabled in this case.
795         */
796         assert(join->zero_result_cause || tab->materialize_table);
797         tab->materialized= false;
798         // The materialized table must be re-read on next evaluation:
799         tab->table()->status= STATUS_GARBAGE | STATUS_NOT_FOUND;
800       }
801     }
802   }
803   return 0;
804 }
805 
806 
807 /**
808   Reset the state of this join object so that it is ready for a
809   new execution.
810 */
811 
reset()812 void JOIN::reset()
813 {
814   DBUG_ENTER("JOIN::reset");
815 
816   if (!executed)
817     DBUG_VOID_RETURN;
818 
819   unit->offset_limit_cnt= (ha_rows)(select_lex->offset_limit ?
820                                     select_lex->offset_limit->val_uint() :
821                                     0ULL);
822 
823   first_record= false;
824   group_sent= false;
825   reset_executed();
826 
827   if (tmp_tables)
828   {
829     for (uint tmp= primary_tables; tmp < primary_tables + tmp_tables; tmp++)
830     {
831       TABLE *const tmp_table= qep_tab[tmp].table();
832       if (!tmp_table->is_created())
833         continue;
834       tmp_table->file->extra(HA_EXTRA_RESET_STATE);
835       tmp_table->file->ha_delete_all_rows();
836       free_io_cache(tmp_table);
837       filesort_free_buffers(tmp_table,0);
838     }
839   }
840   clear_sj_tmp_tables(this);
841   if (current_ref_ptrs != items0)
842   {
843     set_items_ref_array(items0);
844     set_group_rpa= false;
845   }
846 
847   /* need to reset ref access state (see join_read_key) */
848   if (qep_tab)
849   {
850     for (uint i= 0; i < tables; i++)
851     {
852       QEP_TAB *const tab= &qep_tab[i];
853       /*
854         If qep_tab==NULL, we may still have done ref access (to read a const
855         table); const tables will not be re-read in the next execution of this
856         subquery, so resetting key_err is not needed.
857       */
858       tab->ref().key_err= TRUE;
859       /*
860         If the finished execution used "filesort", it may have reset "quick"
861         or "condition" when it didn't need them anymore. Restore them for the
862         new execution (the new filesort will need them when it starts).
863       */
864       tab->restore_quick_optim_and_condition();
865     }
866   }
867 
868   /* Reset of sum functions */
869   if (sum_funcs)
870   {
871     Item_sum *func, **func_ptr= sum_funcs;
872     while ((func= *(func_ptr++)))
873       func->clear();
874   }
875 
876   if (select_lex->has_ft_funcs())
877   {
878     /* TODO: move the code to JOIN::exec */
879     (void)init_ftfuncs(thd, select_lex);
880   }
881 
882   DBUG_VOID_RETURN;
883 }
884 
885 
886 /**
887   Prepare join result.
888 
889   @details Prepare join result prior to join execution or describing.
890   Instantiate derived tables and get schema tables result if necessary.
891 
892   @return
893     TRUE  An error during derived or schema tables instantiation.
894     FALSE Ok
895 */
896 
prepare_result()897 bool JOIN::prepare_result()
898 {
899   DBUG_ENTER("JOIN::prepare_result");
900 
901   error= 0;
902   // Create result tables for materialized views/derived tables
903   if (select_lex->materialized_derived_table_count && !zero_result_cause)
904   {
905     for (TABLE_LIST *tl= select_lex->leaf_tables; tl; tl= tl->next_leaf)
906     {
907       if (tl->is_view_or_derived() && tl->create_derived(thd))
908         goto err;                 /* purecov: inspected */
909     }
910   }
911 
912   if (select_lex->query_result()->prepare2())
913     goto err;
914 
915   if ((select_lex->active_options() & OPTION_SCHEMA_TABLE) &&
916       get_schema_tables_result(this, PROCESSED_BY_JOIN_EXEC))
917     goto err;
918 
919   DBUG_RETURN(false);
920 
921 err:
922   error= 1;
923   DBUG_RETURN(true);
924 }
925 
926 
927 /**
928   Clean up and destroy join object.
929 
930   @return false if previous execution was successful, and true otherwise
931 */
932 
destroy()933 bool JOIN::destroy()
934 {
935   DBUG_ENTER("JOIN::destroy");
936 
937   cond_equal= 0;
938 
939   set_plan_state(NO_PLAN);
940 
941   if (qep_tab)
942   {
943     assert(!join_tab);
944     for (uint i= 0; i < tables; i++)
945       qep_tab[i].cleanup();
946   }
947   if (join_tab || best_ref)
948   {
949     for (uint i= 0; i < tables; i++)
950     {
951       JOIN_TAB *const tab= join_tab ? &join_tab[i] : best_ref[i];
952       tab->cleanup();
953     }
954   }
955 
956   /*
957     We are not using tables anymore
958     Unlock all tables. We may be in an INSERT .... SELECT statement.
959   */
960 
961   // Run Cached_item DTORs!
962   group_fields.delete_elements();
963 
964   /*
965     We can't call delete_elements() on copy_funcs as this will cause
966     problems in free_elements() as some of the elements are then deleted.
967   */
968   tmp_table_param.copy_funcs.empty();
969   tmp_table_param.cleanup();
970  /* Cleanup items referencing temporary table columns */
971   cleanup_item_list(tmp_all_fields1);
972   cleanup_item_list(tmp_all_fields3);
973   destroy_sj_tmp_tables(this);
974 
975   List_iterator<Semijoin_mat_exec> sjm_list_it(sjm_exec_list);
976   Semijoin_mat_exec *sjm;
977   while ((sjm= sjm_list_it++))
978     delete sjm;
979   sjm_exec_list.empty();
980 
981   keyuse_array.clear();
982   DBUG_RETURN(error);
983 }
984 
985 
cleanup_item_list(List<Item> & items) const986 void JOIN::cleanup_item_list(List<Item> &items) const
987 {
988   if (!items.is_empty())
989   {
990     List_iterator_fast<Item> it(items);
991     Item *item;
992     while ((item= it++))
993       item->cleanup();
994   }
995 }
996 
997 
998 /**
999   Optimize a query block and all inner query expressions
1000 
1001   @param thd    thread handler
1002   @returns false if success, true if error
1003 */
1004 
optimize(THD * thd)1005 bool SELECT_LEX::optimize(THD *thd)
1006 {
1007   DBUG_ENTER("SELECT_LEX::optimize");
1008 
1009   assert(join == NULL);
1010   JOIN *const join_local= new JOIN(thd, this);
1011   if (!join_local)
1012     DBUG_RETURN(true);  /* purecov: inspected */
1013 
1014   set_join(join_local);
1015 
1016   if (join->optimize())
1017     DBUG_RETURN(true);
1018 
1019   for (SELECT_LEX_UNIT *unit= first_inner_unit(); unit; unit= unit->next_unit())
1020   {
1021     // Derived tables and const subqueries are already optimized
1022     if (!unit->is_optimized() && unit->optimize(thd))
1023       DBUG_RETURN(true);
1024   }
1025 
1026   DBUG_RETURN(false);
1027 }
1028 
1029 /*****************************************************************************
1030   Go through all combinations of not marked tables and find the one
1031   which uses least records
1032 *****************************************************************************/
1033 
1034 /**
1035   Find how much space the prevous read not const tables takes in cache.
1036 */
1037 
calc_used_field_length(THD * thd,TABLE * table,bool keep_current_rowid,uint * p_used_fields,uint * p_used_fieldlength,uint * p_used_blobs,bool * p_used_null_fields,bool * p_used_uneven_bit_fields)1038 void calc_used_field_length(THD *thd,
1039                             TABLE *table,
1040                             bool keep_current_rowid,
1041                             uint *p_used_fields,
1042                             uint *p_used_fieldlength,
1043                             uint *p_used_blobs,
1044                             bool *p_used_null_fields,
1045                             bool *p_used_uneven_bit_fields)
1046 {
1047   uint null_fields,blobs,fields,rec_length;
1048   Field **f_ptr,*field;
1049   uint uneven_bit_fields;
1050   MY_BITMAP *read_set= table->read_set;
1051 
1052   uneven_bit_fields= null_fields= blobs= fields= rec_length= 0;
1053   for (f_ptr= table->field ; (field= *f_ptr) ; f_ptr++)
1054   {
1055     if (bitmap_is_set(read_set, field->field_index))
1056     {
1057       uint flags= field->flags;
1058       fields++;
1059       rec_length+= field->pack_length();
1060       if (flags & BLOB_FLAG)
1061 	blobs++;
1062       if (!(flags & NOT_NULL_FLAG))
1063         null_fields++;
1064       if (field->type() == MYSQL_TYPE_BIT &&
1065           ((Field_bit*)field)->bit_len)
1066         uneven_bit_fields++;
1067     }
1068   }
1069   if (null_fields || uneven_bit_fields)
1070     rec_length+= (table->s->null_fields + 7) / 8;
1071   if (table->is_nullable())
1072     rec_length+= sizeof(my_bool);
1073   if (blobs)
1074   {
1075     uint blob_length=(uint) (table->file->stats.mean_rec_length-
1076 			     (table->s->reclength - rec_length));
1077     rec_length+= max<uint>(4U, blob_length);
1078   }
1079 
1080   if (keep_current_rowid)
1081   {
1082     rec_length+= table->file->ref_length;
1083     fields++;
1084   }
1085 
1086   *p_used_fields= fields;
1087   *p_used_fieldlength= rec_length;
1088   *p_used_blobs= blobs;
1089   *p_used_null_fields= null_fields;
1090   *p_used_uneven_bit_fields= uneven_bit_fields;
1091 }
1092 
1093 
init_ref_access()1094 bool JOIN::init_ref_access()
1095 {
1096   DBUG_ENTER("JOIN::init_ref_access");
1097   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
1098 
1099   for (uint tableno= const_tables; tableno < tables; tableno++)
1100   {
1101     JOIN_TAB *const tab= best_ref[tableno];
1102 
1103     if (tab->type() == JT_REF) // Here JT_REF means all kinds of ref access
1104     {
1105       assert(tab->position() && tab->position()->key);
1106       if (create_ref_for_key(this, tab, tab->position()->key,
1107                              tab->prefix_tables()))
1108         DBUG_RETURN(true);
1109     }
1110   }
1111 
1112   DBUG_RETURN(false);
1113 }
1114 
1115 
1116 /**
1117   Set the first_sj_inner_tab and last_sj_inner_tab fields for all tables
1118   inside the semijoin nests of the query.
1119 */
set_semijoin_info()1120 void JOIN::set_semijoin_info()
1121 {
1122   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
1123   if (select_lex->sj_nests.is_empty())
1124     return;
1125 
1126   for (uint tableno= const_tables; tableno < tables; )
1127   {
1128     JOIN_TAB *const tab= best_ref[tableno];
1129     const POSITION *const pos= tab->position();
1130 
1131     if (!pos)
1132     {
1133       tableno++;
1134       continue;
1135     }
1136     switch (pos->sj_strategy)
1137     {
1138     case SJ_OPT_NONE:
1139       tableno++;
1140       break;
1141     case SJ_OPT_MATERIALIZE_LOOKUP:
1142     case SJ_OPT_MATERIALIZE_SCAN:
1143     case SJ_OPT_LOOSE_SCAN:
1144     case SJ_OPT_DUPS_WEEDOUT:
1145     case SJ_OPT_FIRST_MATCH:
1146       /*
1147         Remember the first and last semijoin inner tables; this serves to tell
1148         a JOIN_TAB's semijoin strategy (like in setup_join_buffering()).
1149       */
1150       plan_idx last_sj_tab= tableno + pos->n_sj_tables - 1;
1151       plan_idx last_sj_inner=
1152         (pos->sj_strategy == SJ_OPT_DUPS_WEEDOUT) ?
1153         /* Range may end with non-inner table so cannot set last_sj_inner_tab */
1154         NO_PLAN_IDX : last_sj_tab;
1155       for (plan_idx tab_in_range= tableno;
1156            tab_in_range <= last_sj_tab;
1157            tab_in_range++)
1158       {
1159         best_ref[tab_in_range]->set_first_sj_inner(tableno);
1160         best_ref[tab_in_range]->set_last_sj_inner(last_sj_inner);
1161       }
1162       tableno+= pos->n_sj_tables;
1163       break;
1164     }
1165   }
1166 }
1167 
1168 
calc_length_and_keyparts(Key_use * keyuse,JOIN_TAB * tab,const uint key,table_map used_tables,Key_use ** chosen_keyuses,uint * length_out,uint * keyparts_out,table_map * dep_map,bool * maybe_null)1169 void calc_length_and_keyparts(Key_use *keyuse, JOIN_TAB *tab, const uint key,
1170                               table_map used_tables,Key_use **chosen_keyuses,
1171                               uint *length_out, uint *keyparts_out,
1172                               table_map *dep_map, bool *maybe_null)
1173 {
1174   assert(!dep_map || maybe_null);
1175   uint keyparts= 0, length= 0;
1176   uint found_part_ref_or_null= 0;
1177   KEY *const keyinfo= tab->table()->key_info + key;
1178 
1179   do
1180   {
1181     /*
1182       This Key_use is chosen if:
1183       - it involves a key part at the right place (if index is (a,b) we
1184       can have a search criterion on 'b' only if we also have a criterion
1185       on 'a'),
1186       - it references only tables earlier in the plan.
1187       Moreover, the execution layer is limited to maximum one ref_or_null
1188       keypart, as TABLE_REF::null_ref_key is only one byte.
1189     */
1190     if (!(~used_tables & keyuse->used_tables) &&
1191         keyparts == keyuse->keypart &&
1192         !(found_part_ref_or_null & keyuse->optimize))
1193     {
1194       assert(keyparts <= MAX_REF_PARTS);
1195       if (chosen_keyuses)
1196         chosen_keyuses[keyparts]= keyuse;
1197       keyparts++;
1198       length+= keyinfo->key_part[keyuse->keypart].store_length;
1199       found_part_ref_or_null|= keyuse->optimize;
1200       if (dep_map)
1201       {
1202         *dep_map|= keyuse->val->used_tables();
1203         *maybe_null|= keyinfo->key_part[keyuse->keypart].null_bit &&
1204           (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL);
1205       }
1206     }
1207     keyuse++;
1208   } while (keyuse->table_ref == tab->table_ref && keyuse->key == key);
1209   assert(keyparts > 0);
1210   *length_out= length;
1211   *keyparts_out= keyparts;
1212 }
1213 
1214 
1215 /**
1216   Setup a ref access for looking up rows via an index (a key).
1217 
1218   @param join          The join object being handled
1219   @param j             The join_tab which will have the ref access populated
1220   @param first_keyuse  First key part of (possibly multi-part) key
1221   @param used_tables   Bitmap of available tables
1222 
1223   @return False if success, True if error
1224 
1225   Given a Key_use structure that specifies the fields that can be used
1226   for index access, this function creates and set up the structure
1227   used for index look up via one of the access methods {JT_FT,
1228   JT_CONST, JT_REF_OR_NULL, JT_REF, JT_EQ_REF} for the plan operator
1229   'j'. Generally the function sets up the structure j->ref (of type
1230   TABLE_REF), and the access method j->type.
1231 
1232   @note We cannot setup fields used for ref access before we have sorted
1233         the items within multiple equalities according to the final order of
1234         the tables involved in the join operation. Currently, this occurs in
1235         @see substitute_for_best_equal_field().
1236         The exception is ref access for const tables, which are fixed
1237         before the greedy search planner is invoked.
1238 */
1239 
create_ref_for_key(JOIN * join,JOIN_TAB * j,Key_use * org_keyuse,table_map used_tables)1240 bool create_ref_for_key(JOIN *join, JOIN_TAB *j, Key_use *org_keyuse,
1241                         table_map used_tables)
1242 {
1243   DBUG_ENTER("create_ref_for_key");
1244 
1245   Key_use *keyuse= org_keyuse;
1246   const uint key= keyuse->key;
1247   const bool ftkey= (keyuse->keypart == FT_KEYPART);
1248   THD  *const thd= join->thd;
1249   uint keyparts, length;
1250   TABLE *const table= j->table();
1251   KEY   *const keyinfo= table->key_info+key;
1252   Key_use *chosen_keyuses[MAX_REF_PARTS];
1253 
1254   assert(j->keys().is_set(org_keyuse->key));
1255 
1256   /* Calculate the length of the used key. */
1257   if (ftkey)
1258   {
1259     Item_func_match *ifm=(Item_func_match *)keyuse->val;
1260 
1261     length=0;
1262     keyparts=1;
1263     ifm->get_master()->join_key= 1;
1264   }
1265   else /* not ftkey */
1266     calc_length_and_keyparts(keyuse, j, key, used_tables, chosen_keyuses,
1267                              &length, &keyparts, NULL, NULL);
1268   /* set up fieldref */
1269   j->ref().key_parts=keyparts;
1270   j->ref().key_length=length;
1271   j->ref().key=(int) key;
1272   if (!(j->ref().key_buff= (uchar*) thd->mem_calloc(ALIGN_SIZE(length)*2)) ||
1273       !(j->ref().key_copy= (store_key**) thd->alloc((sizeof(store_key*) *
1274                                                    (keyparts)))) ||
1275       !(j->ref().items=    (Item**) thd->alloc(sizeof(Item*)*keyparts)) ||
1276       !(j->ref().cond_guards= (bool**) thd->alloc(sizeof(uint*)*keyparts)))
1277   {
1278     DBUG_RETURN(TRUE);
1279   }
1280   j->ref().key_buff2=j->ref().key_buff+ALIGN_SIZE(length);
1281   j->ref().key_err=1;
1282   j->ref().has_record= FALSE;
1283   j->ref().null_rejecting= 0;
1284   j->ref().use_count= 0;
1285   j->ref().disable_cache= FALSE;
1286   keyuse=org_keyuse;
1287 
1288   uchar *key_buff= j->ref().key_buff;
1289   uchar *null_ref_key= NULL;
1290   bool keyuse_uses_no_tables= true;
1291   if (ftkey)
1292   {
1293     j->ref().items[0]=((Item_func*)(keyuse->val))->key_item();
1294     /* Predicates pushed down into subquery can't be used FT access */
1295     j->ref().cond_guards[0]= NULL;
1296     if (keyuse->used_tables)
1297       DBUG_RETURN(TRUE);                        // not supported yet. SerG
1298 
1299     j->set_type(JT_FT);
1300     j->set_ft_func((Item_func_match *)keyuse->val);
1301     memset(j->ref().key_copy, 0, sizeof(j->ref().key_copy[0]) * keyparts);
1302   }
1303   else
1304   {
1305     // Set up TABLE_REF based on chosen Key_use-s.
1306     for (uint part_no= 0 ; part_no < keyparts ; part_no++)
1307     {
1308       keyuse= chosen_keyuses[part_no];
1309       uint maybe_null= MY_TEST(keyinfo->key_part[part_no].null_bit);
1310 
1311       if (keyuse->val->type() == Item::FIELD_ITEM)
1312       {
1313         // Look up the most appropriate field to base the ref access on.
1314         keyuse->val= get_best_field(static_cast<Item_field *>(keyuse->val),
1315                                     join->cond_equal);
1316         keyuse->used_tables= keyuse->val->used_tables();
1317       }
1318       j->ref().items[part_no]=keyuse->val;        // Save for cond removal
1319       j->ref().cond_guards[part_no]= keyuse->cond_guard;
1320       if (keyuse->null_rejecting)
1321         j->ref().null_rejecting|= (key_part_map)1 << part_no;
1322       keyuse_uses_no_tables= keyuse_uses_no_tables && !keyuse->used_tables;
1323 
1324       store_key* s_key= get_store_key(thd,
1325                                       keyuse,join->const_table_map,
1326                                       &keyinfo->key_part[part_no],
1327                                       key_buff, maybe_null);
1328       if (unlikely(!s_key || thd->is_fatal_error))
1329         DBUG_RETURN(TRUE);
1330 
1331       if (keyuse->used_tables)
1332         /* Comparing against a non-constant. */
1333         j->ref().key_copy[part_no]= s_key;
1334       else
1335       {
1336         /**
1337            The outer reference is to a const table, so we copy the value
1338            straight from that table now (during optimization), instead of from
1339            the temporary table created during execution.
1340 
1341            TODO: Synchronize with the temporary table creation code, so that
1342            there is no need to create a column for this value.
1343         */
1344         bool dummy_value= false;
1345         keyuse->val->walk(&Item::repoint_const_outer_ref,
1346                           Item::WALK_PREFIX,
1347                           pointer_cast<uchar*>(&dummy_value));
1348         /*
1349           key is const, copy value now and possibly skip it while ::exec().
1350 
1351           Note:
1352             Result check of store_key::copy() is unnecessary,
1353             it could be an error returned by store_key::copy() method
1354             but stored value is not null and default value could be used
1355             in this case. Methods which used for storing the value
1356             should be responsible for proper null value setting
1357             in case of an error. Thus it's enough to check s_key->null_key
1358             value only.
1359         */
1360         (void) s_key->copy();
1361         /*
1362           It should be reevaluated in ::exec() if
1363           constant evaluated to NULL value which we might need to
1364           handle as a special case during JOIN::exec()
1365           (As in : 'Full scan on NULL key')
1366         */
1367         if (s_key->null_key)
1368           j->ref().key_copy[part_no]= s_key; // Reevaluate in JOIN::exec()
1369         else
1370           j->ref().key_copy[part_no]= NULL;
1371       }
1372       /*
1373 	Remember if we are going to use REF_OR_NULL
1374 	But only if field _really_ can be null i.e. we force JT_REF
1375 	instead of JT_REF_OR_NULL in case if field can't be null
1376       */
1377       if ((keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL) && maybe_null)
1378       {
1379         assert(null_ref_key == NULL); // or we would overwrite it below
1380         null_ref_key= key_buff;
1381       }
1382       key_buff+=keyinfo->key_part[part_no].store_length;
1383     }
1384   } /* not ftkey */
1385   if (j->type() == JT_FT)
1386     DBUG_RETURN(false);
1387   if (j->type() == JT_CONST)
1388     j->table()->const_table= 1;
1389   else if (((actual_key_flags(keyinfo) &
1390              (HA_NOSAME | HA_NULL_PART_KEY)) != HA_NOSAME) ||
1391 	   keyparts != actual_key_parts(keyinfo) || null_ref_key)
1392   {
1393     /* Must read with repeat */
1394     j->set_type(null_ref_key ? JT_REF_OR_NULL : JT_REF);
1395     j->ref().null_ref_key= null_ref_key;
1396   }
1397   else if (keyuse_uses_no_tables &&
1398            !(table->file->ha_table_flags() & HA_BLOCK_CONST_TABLE))
1399   {
1400     /*
1401       This happen if we are using a constant expression in the ON part
1402       of an LEFT JOIN.
1403       SELECT * FROM a LEFT JOIN b ON b.key=30
1404       Here we should not mark the table as a 'const' as a field may
1405       have a 'normal' value or a NULL value.
1406     */
1407     j->set_type(JT_CONST);
1408     j->position()->rows_fetched= 1.0;
1409   }
1410   else
1411   {
1412     j->set_type(JT_EQ_REF);
1413     j->position()->rows_fetched= 1.0;
1414   }
1415 
1416   DBUG_RETURN(false);
1417 }
1418 
1419 
1420 
1421 static store_key *
get_store_key(THD * thd,Key_use * keyuse,table_map used_tables,KEY_PART_INFO * key_part,uchar * key_buff,uint maybe_null)1422 get_store_key(THD *thd, Key_use *keyuse, table_map used_tables,
1423               KEY_PART_INFO *key_part, uchar *key_buff, uint maybe_null)
1424 {
1425   if (!((~used_tables) & keyuse->used_tables))		// if const item
1426   {
1427     return new store_key_const_item(thd,
1428                                     key_part->field,
1429                                     key_buff + maybe_null,
1430                                     maybe_null ? key_buff : 0,
1431                                     key_part->length,
1432                                     keyuse->val);
1433   }
1434 
1435   Item_field *field_item= NULL;
1436   if (keyuse->val->type() == Item::FIELD_ITEM)
1437     field_item= static_cast<Item_field*>(keyuse->val->real_item());
1438   else if (keyuse->val->type() == Item::REF_ITEM)
1439   {
1440     Item_ref *item_ref= static_cast<Item_ref*>(keyuse->val);
1441     if (item_ref->ref_type() == Item_ref::OUTER_REF)
1442     {
1443       if ((*item_ref->ref)->type() == Item::FIELD_ITEM)
1444         field_item= static_cast<Item_field*>(item_ref->real_item());
1445       else if ((*(Item_ref**)(item_ref)->ref)->ref_type()
1446                == Item_ref::DIRECT_REF
1447                &&
1448                item_ref->real_item()->type() == Item::FIELD_ITEM)
1449         field_item= static_cast<Item_field*>(item_ref->real_item());
1450     }
1451   }
1452   if (field_item)
1453     return new store_key_field(thd,
1454                                key_part->field,
1455                                key_buff + maybe_null,
1456                                maybe_null ? key_buff : 0,
1457                                key_part->length,
1458                                field_item->field,
1459                                keyuse->val->full_name());
1460 
1461   return new store_key_item(thd,
1462                             key_part->field,
1463                             key_buff + maybe_null,
1464                             maybe_null ? key_buff : 0,
1465                             key_part->length,
1466                             keyuse->val);
1467 }
1468 
1469 
copy_inner()1470 enum store_key::store_key_result store_key_hash_item::copy_inner()
1471 {
1472   enum store_key_result res= store_key_item::copy_inner();
1473   if (res != STORE_KEY_FATAL)
1474     *hash= unique_hash(to_field, hash);
1475   return res;
1476 }
1477 
1478 
1479 /**
1480   Extend e1 by AND'ing e2 to the condition e1 points to. The resulting
1481   condition is fixed. Requirement: the input Items must already have
1482   been fixed.
1483 
1484   @param[in,out]   e1 Pointer to condition that will be extended with e2
1485   @param           e2 Condition that will extend e1
1486 
1487   @retval true   if there was a memory allocation error, in which case
1488                  e1 remains unchanged
1489   @retval false  otherwise
1490 */
1491 
and_conditions(Item ** e1,Item * e2)1492 bool and_conditions(Item **e1, Item *e2)
1493 {
1494   assert(!(*e1) || (*e1)->fixed);
1495   assert(!e2 || e2->fixed);
1496   if (*e1)
1497   {
1498     if (!e2)
1499       return false;
1500     Item *res= new Item_cond_and(*e1, e2);
1501     if (unlikely(!res))
1502       return true;
1503 
1504     *e1= res;
1505     res->quick_fix_field();
1506     res->update_used_tables();
1507 
1508   }
1509   else
1510     *e1= e2;
1511   return false;
1512 }
1513 
1514 
1515 #define ICP_COND_USES_INDEX_ONLY 10
1516 
1517 /*
1518   Get a part of the condition that can be checked using only index fields
1519 
1520   SYNOPSIS
1521     make_cond_for_index()
1522       cond           The source condition
1523       table          The table that is partially available
1524       keyno          The index in the above table. Only fields covered by the index
1525                      are available
1526       other_tbls_ok  TRUE <=> Fields of other non-const tables are allowed
1527 
1528   DESCRIPTION
1529     Get a part of the condition that can be checked when for the given table
1530     we have values only of fields covered by some index. The condition may
1531     refer to other tables, it is assumed that we have values of all of their
1532     fields.
1533 
1534     Example:
1535       make_cond_for_index(
1536          "cond(t1.field) AND cond(t2.key1) AND cond(t2.non_key) AND cond(t2.key2)",
1537           t2, keyno(t2.key1))
1538       will return
1539         "cond(t1.field) AND cond(t2.key2)"
1540 
1541   RETURN
1542     Index condition, or NULL if no condition could be inferred.
1543 */
1544 
make_cond_for_index(Item * cond,TABLE * table,uint keyno,bool other_tbls_ok)1545 static Item *make_cond_for_index(Item *cond, TABLE *table, uint keyno,
1546                                  bool other_tbls_ok)
1547 {
1548   assert(cond != NULL);
1549 
1550   if (cond->type() == Item::COND_ITEM)
1551   {
1552     uint n_marked= 0;
1553     if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
1554     {
1555       table_map used_tables= 0;
1556       Item_cond_and *new_cond=new Item_cond_and;
1557       if (!new_cond)
1558         return NULL;
1559       List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
1560       Item *item;
1561       while ((item=li++))
1562       {
1563         Item *fix= make_cond_for_index(item, table, keyno, other_tbls_ok);
1564         if (fix)
1565         {
1566           new_cond->argument_list()->push_back(fix);
1567           used_tables|= fix->used_tables();
1568         }
1569         n_marked += MY_TEST(item->marker == ICP_COND_USES_INDEX_ONLY);
1570       }
1571       if (n_marked ==((Item_cond*)cond)->argument_list()->elements)
1572         cond->marker= ICP_COND_USES_INDEX_ONLY;
1573       switch (new_cond->argument_list()->elements) {
1574       case 0:
1575         return NULL;
1576       case 1:
1577         new_cond->set_used_tables(used_tables);
1578         return new_cond->argument_list()->head();
1579       default:
1580         new_cond->quick_fix_field();
1581         new_cond->set_used_tables(used_tables);
1582         return new_cond;
1583       }
1584     }
1585     else /* It's OR */
1586     {
1587       Item_cond_or *new_cond=new Item_cond_or;
1588       if (!new_cond)
1589         return NULL;
1590       List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
1591       Item *item;
1592       while ((item=li++))
1593       {
1594         Item *fix= make_cond_for_index(item, table, keyno, other_tbls_ok);
1595         if (!fix)
1596           return NULL;
1597         new_cond->argument_list()->push_back(fix);
1598         n_marked += MY_TEST(item->marker == ICP_COND_USES_INDEX_ONLY);
1599       }
1600       if (n_marked ==((Item_cond*)cond)->argument_list()->elements)
1601         cond->marker= ICP_COND_USES_INDEX_ONLY;
1602       new_cond->quick_fix_field();
1603       new_cond->set_used_tables(cond->used_tables());
1604       new_cond->top_level_item();
1605       return new_cond;
1606     }
1607   }
1608 
1609   if (!uses_index_fields_only(cond, table, keyno, other_tbls_ok))
1610   {
1611     /*
1612       Reset marker since it might have the value
1613       ICP_COND_USES_INDEX_ONLY if this condition is part of the select
1614       condition for multiple tables.
1615     */
1616     cond->marker= 0;
1617     return NULL;
1618   }
1619   cond->marker= ICP_COND_USES_INDEX_ONLY;
1620   return cond;
1621 }
1622 
1623 
make_cond_remainder(Item * cond,bool exclude_index)1624 static Item *make_cond_remainder(Item *cond, bool exclude_index)
1625 {
1626   if (exclude_index && cond->marker == ICP_COND_USES_INDEX_ONLY)
1627     return 0; /* Already checked */
1628 
1629   if (cond->type() == Item::COND_ITEM)
1630   {
1631     table_map tbl_map= 0;
1632     if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
1633     {
1634       /* Create new top level AND item */
1635       Item_cond_and *new_cond=new Item_cond_and;
1636       if (!new_cond)
1637 	return (Item*) 0;
1638       List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
1639       Item *item;
1640       while ((item=li++))
1641       {
1642 	Item *fix= make_cond_remainder(item, exclude_index);
1643 	if (fix)
1644         {
1645 	  new_cond->argument_list()->push_back(fix);
1646           tbl_map |= fix->used_tables();
1647         }
1648       }
1649       switch (new_cond->argument_list()->elements) {
1650       case 0:
1651 	return (Item*) 0;
1652       case 1:
1653 	return new_cond->argument_list()->head();
1654       default:
1655 	new_cond->quick_fix_field();
1656         new_cond->set_used_tables(tbl_map);
1657 	return new_cond;
1658       }
1659     }
1660     else /* It's OR */
1661     {
1662       Item_cond_or *new_cond=new Item_cond_or;
1663       if (!new_cond)
1664 	return (Item*) 0;
1665       List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
1666       Item *item;
1667       while ((item=li++))
1668       {
1669 	Item *fix= make_cond_remainder(item, FALSE);
1670 	if (!fix)
1671 	  return (Item*) 0;
1672 	new_cond->argument_list()->push_back(fix);
1673         tbl_map |= fix->used_tables();
1674       }
1675       new_cond->quick_fix_field();
1676       new_cond->set_used_tables(tbl_map);
1677       new_cond->top_level_item();
1678       return new_cond;
1679     }
1680   }
1681   return cond;
1682 }
1683 
1684 
1685 /**
1686   Try to extract and push the index condition down to table handler
1687 
1688   @param  join_tab       join_tab for table
1689   @param  keyno          Index for which extract and push the condition
1690   @param  trace_obj      trace object where information is to be added
1691 */
push_index_cond(const JOIN_TAB * join_tab,uint keyno,Opt_trace_object * trace_obj)1692 void QEP_TAB::push_index_cond(const JOIN_TAB *join_tab,
1693                               uint keyno,
1694                               Opt_trace_object *trace_obj)
1695 {
1696   JOIN *const join_= join();
1697   DBUG_ENTER("push_index_cond");
1698 
1699   ASSERT_BEST_REF_IN_JOIN_ORDER(join_);
1700   assert(join_tab == join_->best_ref[idx()]);
1701 
1702   if (join_tab->reversed_access) // @todo: historical limitation, lift it!
1703     DBUG_VOID_RETURN;
1704 
1705   TABLE *const tbl= table();
1706 
1707   // Disable ICP for Innodb intrinsic temp table because of performance
1708   if (internal_tmp_disk_storage_engine == TMP_TABLE_INNODB &&
1709       tbl->s->db_type() == innodb_hton &&
1710       tbl->s->tmp_table != NO_TMP_TABLE &&
1711       tbl->s->tmp_table != TRANSACTIONAL_TMP_TABLE)
1712     DBUG_VOID_RETURN;
1713 
1714   // TODO: Currently, index on virtual generated column doesn't support ICP
1715   if (tbl->vfield && tbl->index_contains_some_virtual_gcol(keyno))
1716     DBUG_VOID_RETURN;
1717 
1718   /*
1719     Fields of other non-const tables aren't allowed in following cases:
1720        type is:
1721         (JT_ALL | JT_INDEX_SCAN | JT_RANGE | JT_INDEX_MERGE)
1722        and BNL is used.
1723     and allowed otherwise.
1724   */
1725   const bool other_tbls_ok=
1726     !((type() == JT_ALL || type() == JT_INDEX_SCAN ||
1727        type() == JT_RANGE || type() ==  JT_INDEX_MERGE) &&
1728       join_tab->use_join_cache() == JOIN_CACHE::ALG_BNL);
1729 
1730   /*
1731     We will only attempt to push down an index condition when the
1732     following criteria are true:
1733     0. The table has a select condition
1734     1. The storage engine supports ICP.
1735     2. The index_condition_pushdown switch is on and
1736        the use of ICP is not disabled by the NO_ICP hint.
1737     3. The query is not a multi-table update or delete statement. The reason
1738        for this requirement is that the same handler will be used
1739        both for doing the select/join and the update. The pushed index
1740        condition might then also be applied by the storage engine
1741        when doing the update part and result in either not finding
1742        the record to update or updating the wrong record.
1743     4. The JOIN_TAB is not part of a subquery that has guarded conditions
1744        that can be turned on or off during execution of a 'Full scan on NULL
1745        key'.
1746        @see Item_in_optimizer::val_int()
1747        @see subselect_single_select_engine::exec()
1748        @see TABLE_REF::cond_guards
1749        @see setup_join_buffering
1750     5. The join type is not CONST or SYSTEM. The reason for excluding
1751        these join types, is that these are optimized to only read the
1752        record once from the storage engine and later re-use it. In a
1753        join where a pushed index condition evaluates fields from
1754        tables earlier in the join sequence, the pushed condition would
1755        only be evaluated the first time the record value was needed.
1756     6. The index is not a clustered index. The performance improvement
1757        of pushing an index condition on a clustered key is much lower
1758        than on a non-clustered key. This restriction should be
1759        re-evaluated when WL#6061 is implemented.
1760     7. The index on virtual generated columns is not supported for ICP.
1761   */
1762   if (condition() &&
1763       tbl->file->index_flags(keyno, 0, 1) &
1764       HA_DO_INDEX_COND_PUSHDOWN &&
1765       hint_key_state(join_->thd, tbl, keyno, ICP_HINT_ENUM,
1766                      OPTIMIZER_SWITCH_INDEX_CONDITION_PUSHDOWN) &&
1767       join_->thd->lex->sql_command != SQLCOM_UPDATE_MULTI &&
1768       join_->thd->lex->sql_command != SQLCOM_DELETE_MULTI &&
1769       !has_guarded_conds() &&
1770       type() != JT_CONST && type() != JT_SYSTEM &&
1771       !(keyno == tbl->s->primary_key &&
1772         tbl->file->primary_key_is_clustered()))
1773   {
1774     DBUG_EXECUTE("where", print_where(condition(), "full cond",
1775                  QT_ORDINARY););
1776     Item *idx_cond= make_cond_for_index(condition(), tbl,
1777                                         keyno, other_tbls_ok);
1778     DBUG_EXECUTE("where", print_where(idx_cond, "idx cond", QT_ORDINARY););
1779     if (idx_cond)
1780     {
1781       /*
1782         Check that the condition to push actually contains fields from
1783         the index. Without any fields from the index it is unlikely
1784         that it will filter out any records since the conditions on
1785         fields from other tables in most cases have already been
1786         evaluated.
1787       */
1788       idx_cond->update_used_tables();
1789       if ((idx_cond->used_tables() & table_ref->map()) == 0)
1790       {
1791         /*
1792           The following assert is to check that we only skip pushing the
1793           index condition for the following situations:
1794           1. We actually are allowed to generate an index condition on another
1795              table.
1796           2. The index condition is a constant item.
1797           3. The index condition contains an updatable user variable
1798              (test this by checking that the RAND_TABLE_BIT is set).
1799         */
1800         assert(other_tbls_ok ||                                  // 1
1801                idx_cond->const_item() ||                         // 2
1802                (idx_cond->used_tables() & RAND_TABLE_BIT) );     // 3
1803         DBUG_VOID_RETURN;
1804       }
1805 
1806       Item *idx_remainder_cond= 0;
1807 
1808       /*
1809         For BKA cache we store condition to special BKA cache field
1810         because evaluation of the condition requires additional operations
1811         before the evaluation. This condition is used in
1812         JOIN_CACHE_BKA[_UNIQUE]::skip_index_tuple() functions.
1813       */
1814       if (join_tab->use_join_cache() &&
1815           /*
1816             if cache is used then the value is TRUE only
1817             for BKA[_UNIQUE] cache (see setup_join_buffering() func).
1818             In this case other_tbls_ok is an equivalent of
1819             cache->is_key_access().
1820           */
1821           other_tbls_ok &&
1822           (idx_cond->used_tables() &
1823            ~(table_ref->map() | join_->const_table_map)))
1824       {
1825         cache_idx_cond= idx_cond;
1826         trace_obj->add("pushed_to_BKA", true);
1827       }
1828       else
1829       {
1830         idx_remainder_cond= tbl->file->idx_cond_push(keyno, idx_cond);
1831         DBUG_EXECUTE("where",
1832                      print_where(tbl->file->pushed_idx_cond, "icp cond",
1833                                  QT_ORDINARY););
1834       }
1835       /*
1836         Disable eq_ref's "lookup cache" if we've pushed down an index
1837         condition.
1838         TODO: This check happens to work on current ICP implementations, but
1839         there may exist a compliant implementation that will not work
1840         correctly with it. Sort this out when we stabilize the condition
1841         pushdown APIs.
1842       */
1843       if (idx_remainder_cond != idx_cond)
1844       {
1845         ref().disable_cache= TRUE;
1846         trace_obj->add("pushed_index_condition", idx_cond);
1847       }
1848 
1849       Item *row_cond= make_cond_remainder(condition(), TRUE);
1850       DBUG_EXECUTE("where", print_where(row_cond, "remainder cond",
1851                    QT_ORDINARY););
1852 
1853       if (row_cond)
1854       {
1855         if (idx_remainder_cond)
1856           and_conditions(&row_cond, idx_remainder_cond);
1857         idx_remainder_cond= row_cond;
1858       }
1859       set_condition(idx_remainder_cond);
1860       trace_obj->add("table_condition_attached", idx_remainder_cond);
1861     }
1862   }
1863   DBUG_VOID_RETURN;
1864 }
1865 
1866 
1867 /**
1868   Setup the materialized table for a semi-join nest
1869 
1870   @param tab       join_tab for the materialized semi-join table
1871   @param tableno   table number of materialized table
1872   @param inner_pos information about the first inner table of the subquery
1873   @param sjm_pos   information about the materialized semi-join table,
1874                    to be filled with data.
1875 
1876   @details
1877     Setup execution structures for one semi-join materialization nest:
1878     - Create the materialization temporary table, including TABLE_LIST object.
1879     - Create a list of Item_field objects per column in the temporary table.
1880     - Create a keyuse array describing index lookups into the table
1881       (for MaterializeLookup)
1882 
1883   @return False if OK, True if error
1884 */
1885 
setup_semijoin_materialized_table(JOIN_TAB * tab,uint tableno,const POSITION * inner_pos,POSITION * sjm_pos)1886 bool JOIN::setup_semijoin_materialized_table(JOIN_TAB *tab, uint tableno,
1887                                              const POSITION *inner_pos,
1888                                              POSITION *sjm_pos)
1889 {
1890   DBUG_ENTER("JOIN::setup_semijoin_materialized_table");
1891   const TABLE_LIST *const emb_sj_nest= inner_pos->table->emb_sj_nest;
1892   Semijoin_mat_optimize *const sjm_opt= &emb_sj_nest->nested_join->sjm;
1893   Semijoin_mat_exec *const sjm_exec= tab->sj_mat_exec();
1894   const uint field_count= emb_sj_nest->nested_join->sj_inner_exprs.elements;
1895 
1896   assert(inner_pos->sj_strategy == SJ_OPT_MATERIALIZE_LOOKUP ||
1897          inner_pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN);
1898 
1899   /*
1900     Set up the table to write to, do as
1901     Query_result_union::create_result_table does
1902   */
1903   sjm_exec->table_param= Temp_table_param();
1904   count_field_types(select_lex, &sjm_exec->table_param,
1905                     emb_sj_nest->nested_join->sj_inner_exprs, false, true);
1906   sjm_exec->table_param.bit_fields_as_long= true;
1907 
1908   char buffer[NAME_LEN];
1909   const size_t len= my_snprintf(buffer, sizeof(buffer) - 1, "<subquery%u>",
1910                                 emb_sj_nest->nested_join->query_block_id);
1911   char *name= (char *)alloc_root(thd->mem_root, len + 1);
1912   if (name == NULL)
1913     DBUG_RETURN(true); /* purecov: inspected */
1914 
1915   memcpy(name, buffer, len);
1916   name[len] = '\0';
1917   TABLE *table;
1918   if (!(table= create_tmp_table(thd, &sjm_exec->table_param,
1919                                 emb_sj_nest->nested_join->sj_inner_exprs,
1920                                 NULL,
1921                                 true /* distinct */,
1922                                 true /* save_sum_fields */,
1923                                 thd->variables.option_bits |
1924                                 TMP_TABLE_ALL_COLUMNS,
1925                                 HA_POS_ERROR /* rows_limit */,
1926                                 name)))
1927     DBUG_RETURN(true); /* purecov: inspected */
1928   sjm_exec->table= table;
1929   map2table[tableno]= tab;
1930   table->file->extra(HA_EXTRA_WRITE_CACHE);
1931   table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
1932   sj_tmp_tables.push_back(table);
1933   sjm_exec_list.push_back(sjm_exec);
1934 
1935   /*
1936     Hash_field is not applicable for MATERIALIZE_LOOKUP. If hash_field is
1937     created for temporary table, semijoin_types_allow_materialization must
1938     assure that MATERIALIZE_LOOKUP can't be chosen.
1939   */
1940   assert((inner_pos->sj_strategy == SJ_OPT_MATERIALIZE_LOOKUP &&
1941           !table->hash_field) ||
1942          inner_pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN);
1943 
1944   TABLE_LIST *tl;
1945   if (!(tl= (TABLE_LIST *) alloc_root(thd->mem_root, sizeof(TABLE_LIST))))
1946     DBUG_RETURN(true);            /* purecov: inspected */
1947   // TODO: May have to setup outer-join info for this TABLE_LIST !!!
1948 
1949   tl->init_one_table("", 0, name, strlen(name), name, TL_IGNORE);
1950 
1951   tl->table= table;
1952   tl->set_tableno(tableno);
1953 
1954   table->pos_in_table_list= tl;
1955 
1956   if (!(sjm_opt->mat_fields=
1957     (Item_field **) alloc_root(thd->mem_root,
1958                                field_count * sizeof(Item_field **))))
1959     DBUG_RETURN(true);
1960 
1961   for (uint fieldno= 0; fieldno < field_count; fieldno++)
1962   {
1963     if (!(sjm_opt->mat_fields[fieldno]=
1964           new Item_field(table->visible_field_ptr()[fieldno])))
1965       DBUG_RETURN(true);
1966   }
1967 
1968   tab->table_ref= tl;
1969   tab->set_table(table);
1970   tab->set_position(sjm_pos);
1971 
1972   tab->worst_seeks= 1.0;
1973   tab->set_records((ha_rows)emb_sj_nest->nested_join->sjm.expected_rowcount);
1974 
1975   tab->found_records= tab->records();
1976   tab->read_time= (ha_rows)emb_sj_nest->nested_join->sjm.scan_cost.total_cost();
1977 
1978   tab->init_join_cond_ref(tl);
1979 
1980   table->keys_in_use_for_query.set_all();
1981   sjm_pos->table= tab;
1982   sjm_pos->sj_strategy= SJ_OPT_NONE;
1983 
1984   sjm_pos->use_join_buffer= false;
1985   /*
1986     No need to recalculate filter_effect since there are no post-read
1987     conditions for materialized tables.
1988   */
1989   sjm_pos->filter_effect= 1.0;
1990 
1991   /*
1992     Key_use objects are required so that create_ref_for_key() can set up
1993     a proper ref access for this table.
1994   */
1995   Key_use_array *keyuse=
1996    create_keyuse_for_table(thd, table, field_count, sjm_opt->mat_fields,
1997                            emb_sj_nest->nested_join->sj_outer_exprs);
1998   if (!keyuse)
1999     DBUG_RETURN(true);
2000 
2001   double fanout= ((uint)tab->idx() == const_tables) ?
2002     1.0 : best_ref[tab->idx() - 1]->position()->prefix_rowcount;
2003   if (!sjm_exec->is_scan)
2004   {
2005     sjm_pos->key= keyuse->begin(); // MaterializeLookup will use the index
2006     sjm_pos->read_cost= emb_sj_nest->nested_join->sjm.lookup_cost.total_cost() *
2007                         fanout;
2008     tab->set_keyuse(keyuse->begin());
2009     tab->keys().set_bit(0);          // There is one index - use it always
2010     tab->set_index(0);
2011     sjm_pos->rows_fetched= 1.0;
2012     tab->set_type(JT_REF);
2013   }
2014   else
2015   {
2016     sjm_pos->key= NULL; // No index use for MaterializeScan
2017     sjm_pos->read_cost= tab->read_time * fanout;
2018     sjm_pos->rows_fetched= static_cast<double>(tab->records());
2019     tab->set_type(JT_ALL);
2020   }
2021   sjm_pos->set_prefix_join_cost((tab - join_tab), cost_model());
2022 
2023   DBUG_RETURN(false);
2024 }
2025 
2026 
2027 /**
2028   A helper function that allocates appropriate join cache object and
2029   sets next_select function of previous tab.
2030 */
2031 
init_join_cache(JOIN_TAB * join_tab)2032 void QEP_TAB::init_join_cache(JOIN_TAB *join_tab)
2033 {
2034   JOIN *const join_= join();
2035   assert(idx() > 0);
2036   ASSERT_BEST_REF_IN_JOIN_ORDER(join_);
2037   assert(join_tab == join_->best_ref[idx()]);
2038 
2039   JOIN_CACHE *prev_cache= NULL;
2040   if ((uint)idx() > join_->const_tables)
2041   {
2042     QEP_TAB *prev_tab= this - 1;
2043     /*
2044       Link with the previous join cache, but make sure that we do not link
2045       join caches of two different operations when the previous operation was
2046       MaterializeLookup or MaterializeScan, ie if:
2047        1. the previous join_tab has join buffering enabled, and
2048        2. the previous join_tab belongs to a materialized semi-join nest, and
2049        3. this join_tab represents a regular table, or is part of a different
2050           semi-join interval than the previous join_tab.
2051     */
2052     prev_cache= (JOIN_CACHE*)prev_tab->op;
2053     if (prev_cache != NULL &&                                       // 1
2054         sj_is_materialize_strategy(prev_tab->get_sj_strategy()) &&   // 2
2055         first_sj_inner() != prev_tab->first_sj_inner())     // 3
2056       prev_cache= NULL;
2057   }
2058   switch (join_tab->use_join_cache())
2059   {
2060   case JOIN_CACHE::ALG_BNL:
2061     op= new JOIN_CACHE_BNL(join_, this, prev_cache);
2062     break;
2063   case JOIN_CACHE::ALG_BKA:
2064     op= new JOIN_CACHE_BKA(join_, this, join_tab->join_cache_flags, prev_cache);
2065     break;
2066   case JOIN_CACHE::ALG_BKA_UNIQUE:
2067     op= new JOIN_CACHE_BKA_UNIQUE(join_, this, join_tab->join_cache_flags, prev_cache);
2068     break;
2069   default:
2070     assert(0);
2071 
2072   }
2073   DBUG_EXECUTE_IF("jb_alloc_with_prev_fail",
2074                   if (prev_cache)
2075                   {
2076                     DBUG_SET("+d,jb_alloc_fail");
2077                     DBUG_SET("-d,jb_alloc_with_prev_fail");
2078                   });
2079   if (!op || op->init())
2080   {
2081     /*
2082       OOM. If it's in creation of "op" it has thrown error.
2083       If it's in init() (allocation of the join buffer) it has not,
2084       and there's a chance to execute the query:
2085       we remove this join buffer, and all others (as there may be
2086       dependencies due to outer joins).
2087       @todo Consider sending a notification of this problem (a warning to the
2088       client, or to the error log).
2089     */
2090     for (uint i= join_->const_tables; i < join_->tables; i++)
2091     {
2092       QEP_TAB *const q= &join_->qep_tab[i];
2093       if (!q->position())
2094         continue;
2095       JOIN_TAB *const t= join_->best_ref[i];
2096       if (t->use_join_cache() == JOIN_CACHE::ALG_NONE)
2097         continue;
2098       t->set_use_join_cache(JOIN_CACHE::ALG_NONE);
2099       /*
2100         Detach the join buffer from QEP_TAB so that EXPLAIN doesn't show
2101         'Using join buffer'. Destroy the join buffer.
2102       */
2103       if (q->op)
2104       {
2105         q->op->mem_free();
2106         delete q->op;
2107         q->op= NULL;
2108       }
2109       assert(i > 0);
2110       /*
2111         Make the immediately preceding QEP_TAB channel the work to the
2112         non-buffered nested loop algorithm:
2113       */
2114       q[-1].next_select= sub_select;
2115     }
2116   }
2117   else
2118     this[-1].next_select= sub_select_op;
2119 }
2120 
2121 
2122 /**
2123   Plan refinement stage: do various setup things for the executor
2124 
2125   @param join          Join being processed
2126   @param no_jbuf_after Don't use join buffering after table with this number.
2127 
2128   @return false if successful, true if error (Out of memory)
2129 
2130   @details
2131     Plan refinement stage: do various set ups for the executioner
2132       - setup join buffering use
2133       - push index conditions
2134       - increment relevant counters
2135       - etc
2136 */
2137 
2138 bool
make_join_readinfo(JOIN * join,uint no_jbuf_after)2139 make_join_readinfo(JOIN *join, uint no_jbuf_after)
2140 {
2141   const bool statistics= !join->thd->lex->is_explain();
2142   const bool prep_for_pos= join->need_tmp || join->select_distinct ||
2143                            join->group_list || join->order;
2144 
2145   DBUG_ENTER("make_join_readinfo");
2146   ASSERT_BEST_REF_IN_JOIN_ORDER(join);
2147 
2148   Opt_trace_context * const trace= &join->thd->opt_trace;
2149   Opt_trace_object wrapper(trace);
2150   Opt_trace_array trace_refine_plan(trace, "refine_plan");
2151 
2152   if (setup_semijoin_dups_elimination(join, no_jbuf_after))
2153     DBUG_RETURN(TRUE); /* purecov: inspected */
2154 
2155 
2156   for (uint i= join->const_tables; i < join->tables; i++)
2157   {
2158     QEP_TAB *const qep_tab= &join->qep_tab[i];
2159 
2160     if (!qep_tab->position())
2161       continue;
2162 
2163     JOIN_TAB *const tab= join->best_ref[i];
2164     TABLE    *const table= qep_tab->table();
2165     /*
2166      Need to tell handlers that to play it safe, it should fetch all
2167      columns of the primary key of the tables: this is because MySQL may
2168      build row pointers for the rows, and for all columns of the primary key
2169      the read set has not necessarily been set by the server code.
2170     */
2171     if (prep_for_pos)
2172       table->prepare_for_position();
2173 
2174     qep_tab->read_record.table= table;
2175     qep_tab->next_select=sub_select;		/* normal select */
2176     qep_tab->cache_idx_cond= NULL;
2177     table->status= STATUS_GARBAGE | STATUS_NOT_FOUND;
2178     assert(!qep_tab->read_first_record);
2179     qep_tab->read_record.read_record= NULL;
2180     qep_tab->read_record.unlock_row= rr_unlock_row;
2181 
2182     Opt_trace_object trace_refine_table(trace);
2183     trace_refine_table.add_utf8_table(qep_tab->table_ref);
2184 
2185     if (qep_tab->do_loosescan())
2186     {
2187       if (!(qep_tab->loosescan_buf= (uchar*)join->thd->alloc(qep_tab->loosescan_key_len)))
2188         DBUG_RETURN(TRUE); /* purecov: inspected */
2189     }
2190 
2191     if (tab->use_join_cache() != JOIN_CACHE::ALG_NONE)
2192       qep_tab->init_join_cache(tab);
2193 
2194     switch (qep_tab->type()) {
2195     case JT_EQ_REF:
2196     case JT_REF_OR_NULL:
2197     case JT_REF:
2198     case JT_SYSTEM:
2199     case JT_CONST:
2200       if (table->covering_keys.is_set(qep_tab->ref().key) &&
2201           !table->no_keyread)
2202         table->set_keyread(TRUE);
2203       else
2204         qep_tab->push_index_cond(tab, qep_tab->ref().key, &trace_refine_table);
2205       break;
2206     case JT_ALL:
2207       join->thd->set_status_no_index_used();
2208       /* Fall through */
2209     case JT_INDEX_SCAN:
2210       if (tab->position()->filter_effect != COND_FILTER_STALE_NO_CONST &&
2211           !tab->sj_mat_exec())
2212       {
2213         /*
2214           rows_w_const_cond is # of rows which will be read by the access
2215           method, minus those which will not pass the constant condition;
2216           that's how calculate_scan_cost() works. Such number is useful inside
2217           the planner, but obscure to the reader of EXPLAIN; so we put the
2218           real count of read rows into rows_fetched, and move the constant
2219           condition's filter to filter_effect.
2220         */
2221         double rows_w_const_cond= qep_tab->position()->rows_fetched;
2222         table->pos_in_table_list->fetch_number_of_rows();
2223         tab->position()->rows_fetched=
2224           static_cast<double>(table->file->stats.records);
2225         if (tab->position()->filter_effect != COND_FILTER_STALE)
2226         {
2227           // Constant condition moves to filter_effect:
2228           if (tab->position()->rows_fetched == 0) // avoid division by zero
2229             tab->position()->filter_effect= 0.0f;
2230           else
2231             tab->position()->filter_effect*=
2232               static_cast<float>(rows_w_const_cond/tab->position()->rows_fetched);
2233         }
2234       }
2235       if (tab->use_quick == QS_DYNAMIC_RANGE)
2236       {
2237 	join->thd->set_status_no_good_index_used();
2238 	if (statistics)
2239 	  join->thd->inc_status_select_range_check();
2240       }
2241       else
2242       {
2243         if (statistics)
2244         {
2245           if (i == join->const_tables)
2246             join->thd->inc_status_select_scan();
2247           else
2248             join->thd->inc_status_select_full_join();
2249         }
2250       }
2251       break;
2252     case JT_RANGE:
2253     case JT_INDEX_MERGE:
2254       if (statistics)
2255       {
2256         if (i == join->const_tables)
2257           join->thd->inc_status_select_range();
2258         else
2259           join->thd->inc_status_select_full_range_join();
2260       }
2261       if (!table->no_keyread && qep_tab->type() == JT_RANGE)
2262       {
2263         if (table->covering_keys.is_set(qep_tab->quick()->index))
2264         {
2265           assert(qep_tab->quick()->index != MAX_KEY);
2266           table->set_keyread(TRUE);
2267         }
2268         if (!table->key_read)
2269           qep_tab->push_index_cond(tab, qep_tab->quick()->index,
2270                                    &trace_refine_table);
2271       }
2272       if (tab->position()->filter_effect != COND_FILTER_STALE_NO_CONST)
2273       {
2274         double rows_w_const_cond= qep_tab->position()->rows_fetched;
2275         qep_tab->position()->rows_fetched= rows2double(tab->quick()->records);
2276         if (tab->position()->filter_effect != COND_FILTER_STALE)
2277         {
2278           // Constant condition moves to filter_effect:
2279           if (tab->position()->rows_fetched == 0) // avoid division by zero
2280             tab->position()->filter_effect= 0.0f;
2281           else
2282             tab->position()->filter_effect*=
2283               static_cast<float>(rows_w_const_cond/tab->position()->rows_fetched);
2284         }
2285       }
2286       break;
2287     case JT_FT:
2288       if (tab->join()->fts_index_access(tab))
2289       {
2290         table->set_keyread(true);
2291         table->covering_keys.set_bit(tab->ft_func()->key);
2292       }
2293       break;
2294     default:
2295       DBUG_PRINT("error",("Table type %d found",qep_tab->type())); /* purecov: deadcode */
2296       assert(0);
2297       break;					/* purecov: deadcode */
2298     }
2299 
2300     if (tab->position()->filter_effect <= COND_FILTER_STALE)
2301     {
2302       /*
2303         Give a proper value for EXPLAIN.
2304         For performance reasons, we do not recalculate the filter for
2305         non-EXPLAIN queries; thus, EXPLAIN CONNECTION may show 100%
2306         for a query.
2307       */
2308       tab->position()->filter_effect=
2309         join->thd->lex->describe ?
2310         calculate_condition_filter(tab,
2311                                    (tab->ref().key != -1) ? tab->position()->key : NULL,
2312                                    tab->prefix_tables() & ~tab->table_ref->map(),
2313                                    tab->position()->rows_fetched,
2314                                    false) : COND_FILTER_ALLPASS;
2315     }
2316 
2317     qep_tab->pick_table_access_method(tab);
2318 
2319     // Materialize derived tables prior to accessing them.
2320     if (tab->table_ref->uses_materialization())
2321       qep_tab->materialize_table= join_materialize_derived;
2322 
2323     if (qep_tab->sj_mat_exec())
2324       qep_tab->materialize_table= join_materialize_semijoin;
2325   }
2326 
2327   DBUG_RETURN(FALSE);
2328 }
2329 
2330 
2331 /**
2332   Give error if we some tables are done with a full join.
2333 
2334   This is used by multi_table_update and multi_table_delete when running
2335   in safe mode.
2336 
2337   @param join		Join condition
2338 
2339   @retval
2340     0	ok
2341   @retval
2342     1	Error (full join used)
2343 */
2344 
error_if_full_join(JOIN * join)2345 bool error_if_full_join(JOIN *join)
2346 {
2347   ASSERT_BEST_REF_IN_JOIN_ORDER(join);
2348   for (uint i= 0; i < join->primary_tables; i++)
2349   {
2350     JOIN_TAB *const tab= join->best_ref[i];
2351     THD *thd = join->thd;
2352 
2353     /*
2354       Safe update error isn't returned if:
2355       1) It is  an EXPLAIN statement OR
2356       2) Table is not the target.
2357 
2358       Append the first warning (if any) to the error message. Allows the user
2359       to understand why index access couldn't be chosen.
2360     */
2361 
2362     if (!thd->lex->is_explain() && tab->table()->pos_in_table_list->updating &&
2363         tab->type() == JT_ALL)
2364     {
2365       my_error(ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE, MYF(0),
2366                thd->get_stmt_da()->get_first_condition_message());
2367       return true;
2368     }
2369   }
2370   return false;
2371 }
2372 
2373 
2374 /**
2375   Cleanup table of join operation.
2376 */
2377 
cleanup()2378 void JOIN_TAB::cleanup()
2379 {
2380   // Delete parts specific of JOIN_TAB:
2381 
2382   if (table())
2383     table()->reginfo.join_tab= NULL;
2384 
2385   // Delete shared parts:
2386   if (join()->qep_tab)
2387   {
2388     // deletion will be done by QEP_TAB
2389   }
2390   else
2391     qs_cleanup();
2392 
2393   TRASH(this, sizeof(*this));
2394 }
2395 
2396 
cleanup()2397 void QEP_TAB::cleanup()
2398 {
2399   // Delete parts specific of QEP_TAB:
2400   delete filesort;
2401   filesort= NULL;
2402   end_read_record(&read_record);
2403   if (quick_optim() != quick())
2404     delete quick_optim();
2405 
2406   TABLE *const t= table();
2407 
2408   if (t)
2409     t->reginfo.qep_tab= NULL;
2410 
2411   // Delete shared parts:
2412   qs_cleanup();
2413 
2414   // Order of qs_cleanup() and this, matters:
2415   if (op)
2416   {
2417     if (op->type() == QEP_operation::OT_TMP_TABLE)
2418     {
2419       if (t) // Check tmp table is not yet freed.
2420         free_tmp_table(current_thd, t);
2421       delete tmp_table_param;
2422       tmp_table_param= NULL;
2423     }
2424     op->mem_free();
2425   }
2426 
2427   TRASH(this, sizeof(*this));
2428 }
2429 
2430 
qs_cleanup()2431 void QEP_shared_owner::qs_cleanup()
2432 {
2433   /* Skip non-existing derived tables/views result tables */
2434   if (table() &&
2435       (table()->s->tmp_table != INTERNAL_TMP_TABLE || table()->is_created()))
2436   {
2437     table()->set_keyread(FALSE);
2438     table()->file->ha_index_or_rnd_end();
2439     free_io_cache(table());
2440     filesort_free_buffers(table(), true);
2441     TABLE_LIST *const table_ref= table()->pos_in_table_list;
2442     if (table_ref)
2443     {
2444       table_ref->derived_keys_ready= false;
2445       table_ref->derived_key_list.empty();
2446     }
2447   }
2448   delete quick();
2449   TRASH(this, sizeof(*this));
2450 }
2451 
2452 
sjm_query_block_id() const2453 uint QEP_TAB::sjm_query_block_id() const
2454 {
2455   assert(sj_is_materialize_strategy(get_sj_strategy()));
2456   for (uint i= 0; i < join()->primary_tables; ++i)
2457   {
2458     // Find the sj-mat tmp table whose sj nest contains us:
2459     Semijoin_mat_exec *const sjm= join()->qep_tab[i].sj_mat_exec();
2460     if (sjm &&
2461         (uint)idx() >= sjm->inner_table_index &&
2462         (uint)idx() <  sjm->inner_table_index + sjm->table_count)
2463       return sjm->sj_nest->nested_join->query_block_id;
2464   }
2465   assert(false);
2466   return 0;
2467 }
2468 
2469 
2470 /**
2471   Extend join_tab->cond by AND'ing add_cond to it
2472 
2473   @param add_cond    The condition to AND with the existing cond
2474                      for this JOIN_TAB
2475 
2476   @retval true   if there was a memory allocation error
2477   @retval false  otherwise
2478 */
and_with_condition(Item * add_cond)2479 bool QEP_shared_owner::and_with_condition(Item *add_cond)
2480 {
2481   Item *tmp= condition();
2482   if (and_conditions(&tmp, add_cond))
2483     return true;
2484   set_condition(tmp);
2485   return false;
2486 }
2487 
2488 
2489 /**
2490   Partially cleanup JOIN after it has executed: close index or rnd read
2491   (table cursors), free quick selects.
2492 
2493     This function is called in the end of execution of a JOIN, before the used
2494     tables are unlocked and closed.
2495 
2496     For a join that is resolved using a temporary table, the first sweep is
2497     performed against actual tables and an intermediate result is inserted
2498     into the temprorary table.
2499     The last sweep is performed against the temporary table. Therefore,
2500     the base tables and associated buffers used to fill the temporary table
2501     are no longer needed, and this function is called to free them.
2502 
2503     For a join that is performed without a temporary table, this function
2504     is called after all rows are sent, but before EOF packet is sent.
2505 
2506     For a simple SELECT with no subqueries this function performs a full
2507     cleanup of the JOIN and calls mysql_unlock_read_tables to free used base
2508     tables.
2509 
2510     If a JOIN is executed for a subquery or if it has a subquery, we can't
2511     do the full cleanup and need to do a partial cleanup only.
2512     - If a JOIN is not the top level join, we must not unlock the tables
2513     because the outer select may not have been evaluated yet, and we
2514     can't unlock only selected tables of a query.
2515     - Additionally, if this JOIN corresponds to a correlated subquery, we
2516     should not free quick selects and join buffers because they will be
2517     needed for the next execution of the correlated subquery.
2518     - However, if this is a JOIN for a [sub]select, which is not
2519     a correlated subquery itself, but has subqueries, we can free it
2520     fully and also free JOINs of all its subqueries. The exception
2521     is a subquery in SELECT list, e.g: @n
2522     SELECT a, (select max(b) from t1) group by c @n
2523     This subquery will not be evaluated at first sweep and its value will
2524     not be inserted into the temporary table. Instead, it's evaluated
2525     when selecting from the temporary table. Therefore, it can't be freed
2526     here even though it's not correlated.
2527 
2528   @todo
2529     Unlock tables even if the join isn't top level select in the tree
2530 */
2531 
join_free()2532 void JOIN::join_free()
2533 {
2534   SELECT_LEX_UNIT *tmp_unit;
2535   SELECT_LEX *sl;
2536   /*
2537     Optimization: if not EXPLAIN and we are done with the JOIN,
2538     free all tables.
2539   */
2540   bool full= (!select_lex->uncacheable && !thd->lex->describe);
2541   bool can_unlock= full;
2542   DBUG_ENTER("JOIN::join_free");
2543 
2544   cleanup();
2545 
2546   for (tmp_unit= select_lex->first_inner_unit();
2547        tmp_unit;
2548        tmp_unit= tmp_unit->next_unit())
2549     for (sl= tmp_unit->first_select(); sl; sl= sl->next_select())
2550     {
2551       Item_subselect *subselect= sl->master_unit()->item;
2552       bool full_local= full && (!subselect || subselect->is_evaluated());
2553       /*
2554         If this join is evaluated, we can partially clean it up and clean up
2555         all its underlying joins even if they are correlated, only query plan
2556         is left in case a user will run EXPLAIN FOR CONNECTION.
2557         If this join is not yet evaluated, we still must clean it up to
2558         close its table cursors -- it may never get evaluated, as in case of
2559         ... HAVING FALSE OR a IN (SELECT ...))
2560         but all table cursors must be closed before the unlock.
2561       */
2562       sl->cleanup_all_joins();
2563       /* Can't unlock if at least one JOIN is still needed */
2564       can_unlock= can_unlock && full_local;
2565     }
2566 
2567   /*
2568     We are not using tables anymore
2569     Unlock all tables. We may be in an INSERT .... SELECT statement.
2570   */
2571   if (can_unlock && lock && thd->lock && ! thd->locked_tables_mode &&
2572       !(select_lex->active_options() & SELECT_NO_UNLOCK) &&
2573       !select_lex->subquery_in_having &&
2574       (select_lex == (thd->lex->unit->fake_select_lex ?
2575                       thd->lex->unit->fake_select_lex : thd->lex->select_lex)))
2576   {
2577     /*
2578       TODO: unlock tables even if the join isn't top level select in the
2579       tree.
2580     */
2581     mysql_unlock_read_tables(thd, lock);           // Don't free join->lock
2582     DEBUG_SYNC(thd, "after_join_free_unlock");
2583     lock= 0;
2584   }
2585 
2586   DBUG_VOID_RETURN;
2587 }
2588 
2589 
2590 /**
2591   Free resources of given join.
2592 
2593   @param fill   true if we should free all resources, call with full==1
2594                 should be last, before it this function can be called with
2595                 full==0
2596 
2597   @note
2598     With subquery this function definitely will be called several times,
2599     but even for simple query it can be called several times.
2600 */
2601 
cleanup()2602 void JOIN::cleanup()
2603 {
2604   DBUG_ENTER("JOIN::cleanup");
2605 
2606   assert(const_tables <= primary_tables &&
2607          primary_tables <= tables);
2608 
2609   if (qep_tab || join_tab || best_ref)
2610   {
2611     for (uint i= 0; i < tables; i++)
2612     {
2613       QEP_TAB *qtab;
2614       TABLE *table;
2615       QEP_operation *op;
2616       if (qep_tab)
2617       {
2618         assert(!join_tab);
2619         qtab= &qep_tab[i];
2620         op= qtab->op;
2621         table= qtab->table();
2622       }
2623       else
2624       {
2625         qtab= NULL;
2626         op= NULL;
2627         table= (join_tab ? &join_tab[i] : best_ref[i])->table();
2628       }
2629       if (!table)
2630         continue;
2631       if (table->is_created())
2632       {
2633         table->file->ha_index_or_rnd_end();
2634         if (op && op->type() == QEP_operation::OT_TMP_TABLE)
2635         {
2636           int tmp;
2637           if ((tmp= table->file->extra(HA_EXTRA_NO_CACHE)))
2638             table->file->print_error(tmp, MYF(0));
2639         }
2640       }
2641       free_io_cache(table);
2642       filesort_free_buffers(table, false);
2643     }
2644   }
2645 
2646   /* Restore ref array to original state */
2647   if (current_ref_ptrs != items0)
2648   {
2649     set_items_ref_array(items0);
2650     set_group_rpa= false;
2651   }
2652   DBUG_VOID_RETURN;
2653 }
2654 
2655 
2656 /**
2657   Filter out ORDER items those are equal to constants in WHERE
2658 
2659   This function is a limited version of remove_const() for use
2660   with non-JOIN statements (i.e. single-table UPDATE and DELETE).
2661 
2662 
2663   @param order            Linked list of ORDER BY arguments
2664   @param cond             WHERE expression
2665 
2666   @return pointer to new filtered ORDER list or NULL if whole list eliminated
2667 
2668   @note
2669     This function overwrites input order list.
2670 */
2671 
simple_remove_const(ORDER * order,Item * where)2672 ORDER *simple_remove_const(ORDER *order, Item *where)
2673 {
2674   if (!order || !where)
2675     return order;
2676 
2677   ORDER *first= NULL, *prev= NULL;
2678   for (; order; order= order->next)
2679   {
2680     assert(!order->item[0]->with_sum_func); // should never happen
2681     if (!const_expression_in_where(where, order->item[0]))
2682     {
2683       if (!first)
2684         first= order;
2685       if (prev)
2686         prev->next= order;
2687       prev= order;
2688     }
2689   }
2690   if (prev)
2691     prev->next= NULL;
2692   return first;
2693 }
2694 
2695 
2696 
2697 
2698 /*
2699   Check if equality can be used in removing components of GROUP BY/DISTINCT
2700 
2701   SYNOPSIS
2702     test_if_equality_guarantees_uniqueness()
2703       l          the left comparison argument (a field if any)
2704       r          the right comparison argument (a const of any)
2705 
2706   DESCRIPTION
2707     Checks if an equality predicate can be used to take away
2708     DISTINCT/GROUP BY because it is known to be true for exactly one
2709     distinct value (e.g. <expr> == <const>).
2710     Arguments must be of the same type because e.g.
2711     <string_field> = <int_const> may match more than 1 distinct value from
2712     the column.
2713     We must take into consideration and the optimization done for various
2714     string constants when compared to dates etc (see Item_int_with_ref) as
2715     well as the collation of the arguments.
2716 
2717   RETURN VALUE
2718     TRUE    can be used
2719     FALSE   cannot be used
2720 */
2721 static bool
test_if_equality_guarantees_uniqueness(Item * l,Item * r)2722 test_if_equality_guarantees_uniqueness(Item *l, Item *r)
2723 {
2724   return r->const_item() &&
2725     /* elements must be compared as dates */
2726      (Arg_comparator::can_compare_as_dates(l, r, 0) ||
2727       /* or of the same result type */
2728       (r->result_type() == l->result_type() &&
2729        /* and must have the same collation if compared as strings */
2730        (l->result_type() != STRING_RESULT ||
2731         l->collation.collation == r->collation.collation)));
2732 }
2733 
2734 
2735 /*
2736   Return TRUE if i1 and i2 (if any) are equal items,
2737   or if i1 is a wrapper item around the f2 field.
2738 */
2739 
equal(Item * i1,Item * i2,Field * f2)2740 static bool equal(Item *i1, Item *i2, Field *f2)
2741 {
2742   assert((i2 == NULL) ^ (f2 == NULL));
2743 
2744   if (i2 != NULL)
2745     return i1->eq(i2, 1);
2746   else if (i1->type() == Item::FIELD_ITEM)
2747     return f2->eq(((Item_field *) i1)->field);
2748   else
2749     return FALSE;
2750 }
2751 
2752 
2753 /**
2754   Test if a field or an item is equal to a constant value in WHERE
2755 
2756   @param        cond            WHERE clause expression
2757   @param        comp_item       Item to find in WHERE expression
2758                                 (if comp_field != NULL)
2759   @param        comp_field      Field to find in WHERE expression
2760                                 (if comp_item != NULL)
2761   @param[out]   const_item      intermediate arg, set to Item pointer to NULL
2762 
2763   @return TRUE if the field is a constant value in WHERE
2764 
2765   @note
2766     comp_item and comp_field parameters are mutually exclusive.
2767 */
2768 bool
const_expression_in_where(Item * cond,Item * comp_item,Field * comp_field,Item ** const_item)2769 const_expression_in_where(Item *cond, Item *comp_item, Field *comp_field,
2770                           Item **const_item)
2771 {
2772   assert((comp_item == NULL) ^ (comp_field == NULL));
2773 
2774   Item *intermediate= NULL;
2775   if (const_item == NULL)
2776     const_item= &intermediate;
2777 
2778   if (cond->type() == Item::COND_ITEM)
2779   {
2780     bool and_level= (((Item_cond*) cond)->functype()
2781 		     == Item_func::COND_AND_FUNC);
2782     List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
2783     Item *item;
2784     while ((item=li++))
2785     {
2786       bool res=const_expression_in_where(item, comp_item, comp_field,
2787                                          const_item);
2788       if (res)					// Is a const value
2789       {
2790 	if (and_level)
2791 	  return 1;
2792       }
2793       else if (!and_level)
2794 	return 0;
2795     }
2796     return and_level ? 0 : 1;
2797   }
2798   else if (cond->eq_cmp_result() != Item::COND_OK)
2799   {						// boolean compare function
2800     Item_func* func= (Item_func*) cond;
2801     if (func->functype() != Item_func::EQUAL_FUNC &&
2802 	func->functype() != Item_func::EQ_FUNC)
2803       return 0;
2804     Item *left_item=	((Item_func*) cond)->arguments()[0];
2805     Item *right_item= ((Item_func*) cond)->arguments()[1];
2806     if (equal(left_item, comp_item, comp_field))
2807     {
2808       if (test_if_equality_guarantees_uniqueness (left_item, right_item))
2809       {
2810 	if (*const_item)
2811 	  return right_item->eq(*const_item, 1);
2812 	*const_item=right_item;
2813 	return 1;
2814       }
2815     }
2816     else if (equal(right_item, comp_item, comp_field))
2817     {
2818       if (test_if_equality_guarantees_uniqueness (right_item, left_item))
2819       {
2820 	if (*const_item)
2821 	  return left_item->eq(*const_item, 1);
2822 	*const_item=left_item;
2823 	return 1;
2824       }
2825     }
2826   }
2827   return 0;
2828 }
2829 
2830 
2831 /**
2832   Update TMP_TABLE_PARAM with count of the different type of fields.
2833 
2834   This function counts the number of fields, functions and sum
2835   functions (items with type SUM_FUNC_ITEM) for use by
2836   create_tmp_table() and stores it in the Temp_table_param object. It
2837   also resets and calculates the quick_group property, which may have
2838   to be reverted if this function is called after deciding to use
2839   ROLLUP (see JOIN::optimize_rollup()).
2840 
2841   @param select_lex           SELECT_LEX of query
2842   @param param                Description of temp table
2843   @param fields               List of fields to count
2844   @param reset_with_sum_func  Whether to reset with_sum_func of func items
2845   @param save_sum_fields      Count in the way create_tmp_table() expects when
2846                               given the same parameter.
2847 */
2848 
2849 void
count_field_types(SELECT_LEX * select_lex,Temp_table_param * param,List<Item> & fields,bool reset_with_sum_func,bool save_sum_fields)2850 count_field_types(SELECT_LEX *select_lex, Temp_table_param *param,
2851                   List<Item> &fields, bool reset_with_sum_func,
2852                   bool save_sum_fields)
2853 {
2854   List_iterator<Item> li(fields);
2855   Item *field;
2856 
2857   param->field_count= 0;
2858   param->sum_func_count= 0;
2859   param->func_count= 0;
2860   param->hidden_field_count= 0;
2861   param->outer_sum_func_count= 0;
2862   param->quick_group=1;
2863   /*
2864     Loose index scan guarantees that all grouping is done and MIN/MAX
2865     functions are computed, so create_tmp_table() treats this as if
2866     save_sum_fields is set.
2867   */
2868   save_sum_fields|= param->precomputed_group_by;
2869 
2870   while ((field=li++))
2871   {
2872     Item::Type real_type= field->real_item()->type();
2873     if (real_type == Item::FIELD_ITEM)
2874       param->field_count++;
2875     else if (real_type == Item::SUM_FUNC_ITEM)
2876     {
2877       if (! field->const_item())
2878       {
2879 	Item_sum *sum_item=(Item_sum*) field->real_item();
2880         if (!sum_item->depended_from() ||
2881             sum_item->depended_from() == select_lex)
2882         {
2883           if (!sum_item->quick_group)
2884             param->quick_group=0;			// UDF SUM function
2885           param->sum_func_count++;
2886 
2887           for (uint i=0 ; i < sum_item->get_arg_count() ; i++)
2888           {
2889             if (sum_item->get_arg(i)->real_item()->type() == Item::FIELD_ITEM)
2890               param->field_count++;
2891             else
2892               param->func_count++;
2893           }
2894         }
2895         param->func_count++;
2896       }
2897       else if (save_sum_fields)
2898       {
2899         /*
2900           Count the way create_tmp_table() does if asked to preserve
2901           Item_sum_* functions in fields list.
2902 
2903           Item field is an Item_sum_* or a reference to such an
2904           item. We need to distinguish between these two cases since
2905           they are treated differently by create_tmp_table().
2906         */
2907         if (field->type() == Item::SUM_FUNC_ITEM) // An Item_sum_*
2908           param->field_count++;
2909         else // A reference to an Item_sum_*
2910         {
2911           param->func_count++;
2912           param->sum_func_count++;
2913         }
2914       }
2915     }
2916     else
2917     {
2918       param->func_count++;
2919       if (reset_with_sum_func)
2920 	field->with_sum_func=0;
2921       if (field->with_sum_func)
2922         param->outer_sum_func_count++;
2923     }
2924   }
2925 }
2926 
2927 
2928 /**
2929   Return 1 if second is a subpart of first argument.
2930 
2931   If first parts has different direction, change it to second part
2932   (group is sorted like order)
2933 */
2934 
2935 bool
test_if_subpart(ORDER * a,ORDER * b)2936 test_if_subpart(ORDER *a,ORDER *b)
2937 {
2938   for (; a && b; a=a->next,b=b->next)
2939   {
2940     if ((*a->item)->eq(*b->item,1))
2941       a->direction= b->direction;
2942     else
2943       return 0;
2944   }
2945   return MY_TEST(!b);
2946 }
2947 
2948 /**
2949   calc how big buffer we need for comparing group entries.
2950 */
2951 
2952 void
calc_group_buffer(JOIN * join,ORDER * group)2953 calc_group_buffer(JOIN *join,ORDER *group)
2954 {
2955   uint key_length=0, parts=0, null_parts=0;
2956 
2957   if (group)
2958     join->grouped= true;
2959   for (; group ; group=group->next)
2960   {
2961     Item *group_item= *group->item;
2962     Field *field= group_item->get_tmp_table_field();
2963     if (field)
2964     {
2965       enum_field_types type;
2966       if ((type= field->type()) == MYSQL_TYPE_BLOB)
2967 	key_length+=MAX_BLOB_WIDTH;		// Can't be used as a key
2968       else if (type == MYSQL_TYPE_VARCHAR || type == MYSQL_TYPE_VAR_STRING)
2969         key_length+= field->field_length + HA_KEY_BLOB_LENGTH;
2970       else if (type == MYSQL_TYPE_BIT)
2971       {
2972         /* Bit is usually stored as a longlong key for group fields */
2973         key_length+= 8;                         // Big enough
2974       }
2975       else
2976 	key_length+= field->pack_length();
2977     }
2978     else
2979     {
2980       switch (group_item->result_type()) {
2981       case REAL_RESULT:
2982         key_length+= sizeof(double);
2983         break;
2984       case INT_RESULT:
2985         key_length+= sizeof(longlong);
2986         break;
2987       case DECIMAL_RESULT:
2988         key_length+= my_decimal_get_binary_size(group_item->max_length -
2989                                                 (group_item->decimals ? 1 : 0),
2990                                                 group_item->decimals);
2991         break;
2992       case STRING_RESULT:
2993       {
2994         /*
2995           As items represented as DATE/TIME fields in the group buffer
2996           have STRING_RESULT result type, we increase the length
2997           by 8 as maximum pack length of such fields.
2998         */
2999         if (group_item->is_temporal())
3000         {
3001           key_length+= 8;
3002         }
3003         else if (group_item->field_type() == MYSQL_TYPE_BLOB)
3004           key_length+= MAX_BLOB_WIDTH;		// Can't be used as a key
3005         else
3006         {
3007           /*
3008             Group strings are taken as varstrings and require an length field.
3009             A field is not yet created by create_tmp_field()
3010             and the sizes should match up.
3011           */
3012           key_length+= group_item->max_length + HA_KEY_BLOB_LENGTH;
3013         }
3014         break;
3015       }
3016       default:
3017         /* This case should never be choosen */
3018         assert(0);
3019         my_error(ER_OUT_OF_RESOURCES, MYF(ME_FATALERROR));
3020       }
3021     }
3022     parts++;
3023     if (group_item->maybe_null)
3024       null_parts++;
3025   }
3026   join->tmp_table_param.group_length=key_length+null_parts;
3027   join->tmp_table_param.group_parts=parts;
3028   join->tmp_table_param.group_null_parts=null_parts;
3029 }
3030 
3031 
3032 
3033 /**
3034   Make an array of pointers to sum_functions to speed up
3035   sum_func calculation.
3036 
3037   @retval
3038     0	ok
3039   @retval
3040     1	Error
3041 */
3042 
alloc_func_list()3043 bool JOIN::alloc_func_list()
3044 {
3045   uint func_count, group_parts;
3046   DBUG_ENTER("alloc_func_list");
3047 
3048   func_count= tmp_table_param.sum_func_count;
3049   /*
3050     If we are using rollup, we need a copy of the summary functions for
3051     each level
3052   */
3053   if (rollup.state != ROLLUP::STATE_NONE)
3054     func_count*= (send_group_parts+1);
3055 
3056   group_parts= send_group_parts;
3057   /*
3058     If distinct, reserve memory for possible
3059     disctinct->group_by optimization
3060   */
3061   if (select_distinct)
3062   {
3063     group_parts+= fields_list.elements;
3064     /*
3065       If the ORDER clause is specified then it's possible that
3066       it also will be optimized, so reserve space for it too
3067     */
3068     if (order)
3069     {
3070       ORDER *ord;
3071       for (ord= order; ord; ord= ord->next)
3072         group_parts++;
3073     }
3074   }
3075 
3076   /* This must use calloc() as rollup_make_fields depends on this */
3077   sum_funcs= (Item_sum**) thd->mem_calloc(sizeof(Item_sum**) * (func_count+1) +
3078 				           sizeof(Item_sum***) * (group_parts+1));
3079   sum_funcs_end= (Item_sum***) (sum_funcs + func_count + 1);
3080   DBUG_RETURN(sum_funcs == 0);
3081 }
3082 
3083 
3084 /**
3085   Initialize 'sum_funcs' array with all Item_sum objects.
3086 
3087   @param field_list        All items
3088   @param send_result_set_metadata       Items in select list
3089   @param before_group_by   Set to 1 if this is called before GROUP BY handling
3090   @param recompute         Set to TRUE if sum_funcs must be recomputed
3091 
3092   @retval
3093     0  ok
3094   @retval
3095     1  error
3096 */
3097 
make_sum_func_list(List<Item> & field_list,List<Item> & send_result_set_metadata,bool before_group_by,bool recompute)3098 bool JOIN::make_sum_func_list(List<Item> &field_list,
3099                               List<Item> &send_result_set_metadata,
3100 			      bool before_group_by, bool recompute)
3101 {
3102   List_iterator_fast<Item> it(field_list);
3103   Item_sum **func;
3104   Item *item;
3105   DBUG_ENTER("make_sum_func_list");
3106 
3107   if (*sum_funcs && !recompute)
3108     DBUG_RETURN(FALSE); /* We have already initialized sum_funcs. */
3109 
3110   func= sum_funcs;
3111   while ((item=it++))
3112   {
3113     if (item->type() == Item::SUM_FUNC_ITEM && !item->const_item() &&
3114         (!((Item_sum*) item)->depended_from() ||
3115          ((Item_sum *)item)->depended_from() == select_lex))
3116       *func++= (Item_sum*) item;
3117   }
3118   if (before_group_by && rollup.state == ROLLUP::STATE_INITED)
3119   {
3120     rollup.state= ROLLUP::STATE_READY;
3121     if (rollup_make_fields(field_list, send_result_set_metadata, &func))
3122       DBUG_RETURN(TRUE);			// Should never happen
3123   }
3124   else if (rollup.state == ROLLUP::STATE_NONE)
3125   {
3126     for (uint i=0 ; i <= send_group_parts ;i++)
3127       sum_funcs_end[i]= func;
3128   }
3129   else if (rollup.state == ROLLUP::STATE_READY)
3130     DBUG_RETURN(FALSE);                         // Don't put end marker
3131   *func=0;					// End marker
3132   DBUG_RETURN(FALSE);
3133 }
3134 
3135 
3136 /**
3137   Free joins of subselect of this select.
3138 
3139   @param thd      THD pointer
3140   @param select   pointer to st_select_lex which subselects joins we will free
3141 */
3142 
free_underlaid_joins(THD * thd,SELECT_LEX * select)3143 void free_underlaid_joins(THD *thd, SELECT_LEX *select)
3144 {
3145   for (SELECT_LEX_UNIT *unit= select->first_inner_unit();
3146        unit;
3147        unit= unit->next_unit())
3148     unit->cleanup(false);
3149 }
3150 
3151 /****************************************************************************
3152   ROLLUP handling
3153 ****************************************************************************/
3154 
3155 /**
3156    Wrap all constant Items in GROUP BY list.
3157 
3158    For ROLLUP queries each constant item referenced in GROUP BY list
3159    is wrapped up into an Item_func object yielding the same value
3160    as the constant item. The objects of the wrapper class are never
3161    considered as constant items and besides they inherit all
3162    properties of the Item_result_field class.
3163    This wrapping allows us to ensure writing constant items
3164    into temporary tables whenever the result of the ROLLUP
3165    operation has to be written into a temporary table, e.g. when
3166    ROLLUP is used together with DISTINCT in the SELECT list.
3167    Usually when creating temporary tables for a intermidiate
3168    result we do not include fields for constant expressions.
3169 
3170    @retval
3171      0  if ok
3172    @retval
3173      1  on error
3174 */
3175 
rollup_process_const_fields()3176 bool JOIN::rollup_process_const_fields()
3177 {
3178   ORDER *group_tmp;
3179   Item *item;
3180   List_iterator<Item> it(all_fields);
3181 
3182   for (group_tmp= group_list; group_tmp; group_tmp= group_tmp->next)
3183   {
3184     if (!(*group_tmp->item)->const_item())
3185       continue;
3186     while ((item= it++))
3187     {
3188       if (*group_tmp->item == item)
3189       {
3190         Item* new_item= new Item_func_rollup_const(item);
3191         if (!new_item)
3192           return 1;
3193         new_item->fix_fields(thd, (Item **) 0);
3194         thd->change_item_tree(it.ref(), new_item);
3195         for (ORDER *tmp= group_tmp; tmp; tmp= tmp->next)
3196         {
3197           if (*tmp->item == item)
3198             thd->change_item_tree(tmp->item, new_item);
3199         }
3200         break;
3201       }
3202     }
3203     it.rewind();
3204   }
3205   return 0;
3206 }
3207 
3208 
3209 /**
3210   Fill up rollup structures with pointers to fields to use.
3211 
3212   Creates copies of item_sum items for each sum level.
3213 
3214   @param fields_arg		List of all fields (hidden and real ones)
3215   @param sel_fields		Pointer to selected fields
3216   @param func			Store here a pointer to all fields
3217 
3218   @retval
3219     0	if ok;
3220     In this case func is pointing to next not used element.
3221   @retval
3222     1    on error
3223 */
3224 
rollup_make_fields(List<Item> & fields_arg,List<Item> & sel_fields,Item_sum *** func)3225 bool JOIN::rollup_make_fields(List<Item> &fields_arg, List<Item> &sel_fields,
3226 			      Item_sum ***func)
3227 {
3228   List_iterator_fast<Item> it(fields_arg);
3229   Item *first_field= sel_fields.head();
3230   uint level;
3231 
3232   /*
3233     Create field lists for the different levels
3234 
3235     The idea here is to have a separate field list for each rollup level to
3236     avoid all runtime checks of which columns should be NULL.
3237 
3238     The list is stored in reverse order to get sum function in such an order
3239     in func that it makes it easy to reset them with init_sum_functions()
3240 
3241     Assuming:  SELECT a, b, c SUM(b) FROM t1 GROUP BY a,b WITH ROLLUP
3242 
3243     rollup.fields[0] will contain list where a,b,c is NULL
3244     rollup.fields[1] will contain list where b,c is NULL
3245     ...
3246     rollup.ref_pointer_array[#] points to fields for rollup.fields[#]
3247     ...
3248     sum_funcs_end[0] points to all sum functions
3249     sum_funcs_end[1] points to all sum functions, except grand totals
3250     ...
3251   */
3252 
3253   for (level=0 ; level < send_group_parts ; level++)
3254   {
3255     uint i;
3256     uint pos= send_group_parts - level -1;
3257     bool real_fields= 0;
3258     Item *item;
3259     List_iterator<Item> new_it(rollup.fields[pos]);
3260     Ref_ptr_array ref_array_start= rollup.ref_pointer_arrays[pos];
3261     ORDER *start_group;
3262 
3263     /* Point to first hidden field */
3264     uint ref_array_ix= fields_arg.elements-1;
3265 
3266     /* Remember where the sum functions ends for the previous level */
3267     sum_funcs_end[pos+1]= *func;
3268 
3269     /* Find the start of the group for this level */
3270     for (i= 0, start_group= group_list ;
3271 	 i++ < pos ;
3272 	 start_group= start_group->next)
3273       ;
3274 
3275     it.rewind();
3276     while ((item= it++))
3277     {
3278       if (item == first_field)
3279       {
3280 	real_fields= 1;				// End of hidden fields
3281         ref_array_ix= 0;
3282       }
3283 
3284       if (item->type() == Item::SUM_FUNC_ITEM && !item->const_item() &&
3285           (!((Item_sum*) item)->depended_from() ||
3286            ((Item_sum *)item)->depended_from() == select_lex))
3287 
3288       {
3289 	/*
3290 	  This is a top level summary function that must be replaced with
3291 	  a sum function that is reset for this level.
3292 
3293 	  NOTE: This code creates an object which is not that nice in a
3294 	  sub select.  Fortunately it's not common to have rollup in
3295 	  sub selects.
3296 	*/
3297 	item= item->copy_or_same(thd);
3298 	((Item_sum*) item)->make_unique();
3299 	*(*func)= (Item_sum*) item;
3300 	(*func)++;
3301       }
3302       else
3303       {
3304 	/* Check if this is something that is part of this group by */
3305 	ORDER *group_tmp;
3306 	for (group_tmp= start_group, i= pos ;
3307              group_tmp ; group_tmp= group_tmp->next, i++)
3308 	{
3309           if (*group_tmp->item == item)
3310 	  {
3311 	    /*
3312 	      This is an element that is used by the GROUP BY and should be
3313 	      set to NULL in this level
3314 	    */
3315             Item_null_result *null_item=
3316               new (thd->mem_root) Item_null_result(item->field_type(),
3317                                                    item->result_type());
3318             if (!null_item)
3319               return 1;
3320 	    item->maybe_null= 1;		// Value will be null sometimes
3321             null_item->result_field= item->get_tmp_table_field();
3322             item= null_item;
3323 	    break;
3324 	  }
3325 	}
3326       }
3327       ref_array_start[ref_array_ix]= item;
3328       if (real_fields)
3329       {
3330 	(void) new_it++;			// Point to next item
3331 	new_it.replace(item);			// Replace previous
3332 	ref_array_ix++;
3333       }
3334       else
3335 	ref_array_ix--;
3336     }
3337   }
3338   sum_funcs_end[0]= *func;			// Point to last function
3339   return 0;
3340 }
3341 
3342 
3343 /**
3344   clear results if there are not rows found for group
3345   (end_send_group/end_write_group)
3346   @retval
3347     FALSE if OK
3348   @retval
3349     TRUE on error
3350 */
3351 
clear()3352 bool JOIN::clear()
3353 {
3354   /*
3355     must clear only the non-const tables, as const tables
3356     are not re-calculated.
3357   */
3358   for (uint tableno= const_tables; tableno < primary_tables; tableno++)
3359     qep_tab[tableno].table()->set_null_row();  // All fields are NULL
3360 
3361   if (copy_fields(&tmp_table_param, thd))
3362     return true;
3363 
3364   if (sum_funcs)
3365   {
3366     Item_sum *func, **func_ptr= sum_funcs;
3367     while ((func= *(func_ptr++)))
3368       func->clear();
3369   }
3370   return false;
3371 }
3372 
3373 
3374 /**
3375   Change the Query_result object of the query block.
3376 
3377   If old_result is not used, forward the call to the current
3378   Query_result in case it is a wrapper around old_result.
3379 
3380   Call prepare() and prepare2() on the new Query_result if we decide
3381   to use it.
3382 
3383   @param new_result New Query_result object
3384   @param old_result Old Query_result object (NULL to force change)
3385 
3386   @retval false Success
3387   @retval true  Error
3388 */
3389 
change_query_result(Query_result_interceptor * new_result,Query_result_interceptor * old_result)3390 bool SELECT_LEX::change_query_result(Query_result_interceptor *new_result,
3391                                      Query_result_interceptor *old_result)
3392 {
3393   DBUG_ENTER("SELECT_LEX::change_query_result");
3394   if (old_result == NULL || query_result() == old_result)
3395   {
3396     set_query_result(new_result);
3397     if (query_result()->prepare(fields_list, master_unit()) ||
3398         query_result()->prepare2())
3399       DBUG_RETURN(true); /* purecov: inspected */
3400     DBUG_RETURN(false);
3401   }
3402   else
3403   {
3404     const bool ret= query_result()->change_query_result(new_result);
3405     DBUG_RETURN(ret);
3406   }
3407 }
3408 
3409 /**
3410   Add having condition as a filter condition, which is applied when reading
3411   from the temp table.
3412 
3413   @param    curr_tmp_table  Table number to which having conds are added.
3414   @returns  false if success, true if error.
3415 */
3416 
add_having_as_tmp_table_cond(uint curr_tmp_table)3417 bool JOIN::add_having_as_tmp_table_cond(uint curr_tmp_table)
3418 {
3419   having_cond->update_used_tables();
3420   QEP_TAB *const curr_table= &qep_tab[curr_tmp_table];
3421   table_map used_tables;
3422   Opt_trace_context *const trace= &thd->opt_trace;
3423 
3424   DBUG_ENTER("JOIN::add_having_as_tmp_table_cond");
3425 
3426   if (curr_table->table_ref)
3427     used_tables= curr_table->table_ref->map();
3428   else
3429   {
3430     /*
3431       Pushing parts of HAVING to an internal temporary table.
3432       Fields in HAVING condition may have been replaced with fields in an
3433       internal temporary table. This table has map=1, hence we check that
3434       we have no fields from other tables (outer references are fine).
3435       Unfortunaly, update_used_tables() is not reliable for subquery
3436       items, which could thus still have other tables in their
3437       used_tables() information.
3438     */
3439     assert(having_cond->has_subquery() ||
3440            !(having_cond->used_tables() & ~(1 | PSEUDO_TABLE_BITS)));
3441     used_tables= 1;
3442   }
3443 
3444   /*
3445     All conditions which can be applied after reading from used_tables are
3446     added as filter conditions of curr_tmp_table. If condition's used_tables is
3447     not read yet for example subquery in having, then it will be kept as it is
3448     in original having_cond of join.
3449   */
3450   Item* sort_table_cond= make_cond_for_table(having_cond, used_tables,
3451                                              (table_map) 0, false);
3452   if (sort_table_cond)
3453   {
3454     if (!curr_table->condition())
3455       curr_table->set_condition(sort_table_cond);
3456     else
3457     {
3458       curr_table->set_condition(new Item_cond_and(curr_table->condition(),
3459                                                   sort_table_cond));
3460       if (curr_table->condition()->fix_fields(thd, 0))
3461         DBUG_RETURN(true);
3462     }
3463     curr_table->condition()->top_level_item();
3464     DBUG_EXECUTE("where",print_where(curr_table->condition(),
3465 				 "select and having",
3466                                      QT_ORDINARY););
3467 
3468     having_cond= make_cond_for_table(having_cond, ~ (table_map) 0,
3469                                      ~used_tables, false);
3470     DBUG_EXECUTE("where",
3471                  print_where(having_cond, "having after sort",
3472                  QT_ORDINARY););
3473 
3474     Opt_trace_object trace_wrapper(trace);
3475     Opt_trace_object(trace, "sort_using_internal_table")
3476                 .add("condition_for_sort", sort_table_cond)
3477                 .add("having_after_sort", having_cond);
3478   }
3479 
3480   DBUG_RETURN(false);
3481 }
3482 
3483 
3484 /**
3485   Init tmp tables usage info.
3486 
3487   @details
3488   This function finalizes execution plan by taking following actions:
3489     .) tmp tables are created, but not instantiated (this is done during
3490        execution). JOIN_TABs dedicated to tmp tables are filled appropriately.
3491        see JOIN::create_intermediate_table.
3492     .) prepare fields lists (fields, all_fields, ref_pointer_array slices) for
3493        each required stage of execution. These fields lists are set for
3494        tmp tables' tabs and for the tab of last table in the join.
3495     .) fill info for sorting/grouping/dups removal is prepared and saved to
3496        appropriate tabs. Here is an example:
3497         SELECT * from t1,t2 WHERE ... GROUP BY t1.f1 ORDER BY t2.f2, t1.f2
3498         and lets assume that the table order in the plan is t1,t2.
3499        In this case optimizer will sort for group only the first table as the
3500        second one isn't mentioned in GROUP BY. The result will be materialized
3501        in tmp table.  As filesort can't sort join optimizer will sort tmp table
3502        also. The first sorting (for group) is called simple as is doesn't
3503        require tmp table.  The Filesort object for it is created here - in
3504        JOIN::create_intermediate_table.  Filesort for the second case is
3505        created here, in JOIN::make_tmp_tables_info.
3506 
3507   @note
3508   This function may change tmp_table_param.precomputed_group_by. This
3509   affects how create_tmp_table() treats aggregation functions, so
3510   count_field_types() must be called again to make sure this is taken
3511   into consideration.
3512 
3513   @returns
3514   false - Ok
3515   true  - Error
3516 */
3517 
make_tmp_tables_info()3518 bool JOIN::make_tmp_tables_info()
3519 {
3520   assert(!join_tab);
3521   List<Item> *curr_all_fields= &all_fields;
3522   List<Item> *curr_fields_list= &fields_list;
3523   bool materialize_join= false;
3524   uint curr_tmp_table= const_tables;
3525   TABLE *exec_tmp_table= NULL;
3526   DBUG_ENTER("JOIN::make_tmp_tables_info");
3527 
3528   /*
3529     In this function, we may change having_cond into a condition on a
3530     temporary sort/group table, so we have to assign having_for_explain now:
3531   */
3532   having_for_explain= having_cond;
3533 
3534   const bool has_group_by= this->grouped;
3535 
3536   /*
3537     Setup last table to provide fields and all_fields lists to the next
3538     node in the plan.
3539   */
3540   if (qep_tab)
3541   {
3542     qep_tab[primary_tables - 1].fields= &fields_list;
3543     qep_tab[primary_tables - 1].all_fields= &all_fields;
3544   }
3545   /*
3546     The loose index scan access method guarantees that all grouping or
3547     duplicate row elimination (for distinct) is already performed
3548     during data retrieval, and that all MIN/MAX functions are already
3549     computed for each group. Thus all MIN/MAX functions should be
3550     treated as regular functions, and there is no need to perform
3551     grouping in the main execution loop.
3552     Notice that currently loose index scan is applicable only for
3553     single table queries, thus it is sufficient to test only the first
3554     join_tab element of the plan for its access method.
3555   */
3556   if (qep_tab && qep_tab[0].quick() && qep_tab[0].quick()->is_loose_index_scan())
3557     tmp_table_param.precomputed_group_by=
3558       !qep_tab[0].quick()->is_agg_loose_index_scan();
3559 
3560   /* Create a tmp table if distinct or if the sort is too complicated */
3561   if (need_tmp)
3562   {
3563     curr_tmp_table= primary_tables;
3564     tmp_tables++;
3565     if (plan_is_const())
3566       first_select= sub_select_op;
3567 
3568     /*
3569       Create temporary table on first execution of this join.
3570       (Will be reused if this is a subquery that is executed several times.)
3571       Don't use tmp table grouping for json aggregate funcs as it's
3572       very ineffective.
3573     */
3574     init_items_ref_array();
3575 
3576     ORDER_with_src tmp_group;
3577     if (!simple_group && !(test_flags & TEST_NO_KEY_GROUP) && !with_json_agg)
3578       tmp_group= group_list;
3579 
3580     tmp_table_param.hidden_field_count=
3581       all_fields.elements - fields_list.elements;
3582 
3583     if (create_intermediate_table(&qep_tab[curr_tmp_table],
3584                                   &all_fields, tmp_group,
3585                                   group_list && simple_group))
3586       DBUG_RETURN(true);
3587     exec_tmp_table= qep_tab[curr_tmp_table].table();
3588 
3589     if (exec_tmp_table->distinct)
3590       optimize_distinct();
3591 
3592     /*
3593       If there is no sorting or grouping, 'use_order'
3594       index result should not have been requested.
3595       Exception: LooseScan strategy for semijoin requires
3596       sorted access even if final result is not to be sorted.
3597     */
3598     assert(
3599            !(ordered_index_usage == ordered_index_void &&
3600              !plan_is_const() &&
3601              qep_tab[const_tables].position()->sj_strategy != SJ_OPT_LOOSE_SCAN &&
3602              qep_tab[const_tables].use_order()));
3603 
3604     /* Change sum_fields reference to calculated fields in tmp_table */
3605     assert(items1.is_null());
3606     items1= select_lex->ref_ptr_array_slice(2);
3607     if (sort_and_group || qep_tab[curr_tmp_table].table()->group ||
3608         tmp_table_param.precomputed_group_by)
3609     {
3610       if (change_to_use_tmp_fields(thd, items1,
3611                                    tmp_fields_list1, tmp_all_fields1,
3612                                    fields_list.elements, all_fields))
3613         DBUG_RETURN(true);
3614     }
3615     else
3616     {
3617       if (change_refs_to_tmp_fields(thd, items1,
3618                                     tmp_fields_list1, tmp_all_fields1,
3619                                     fields_list.elements, all_fields))
3620         DBUG_RETURN(true);
3621     }
3622     curr_all_fields= &tmp_all_fields1;
3623     curr_fields_list= &tmp_fields_list1;
3624     // Need to set them now for correct group_fields setup, reset at the end.
3625     set_items_ref_array(items1);
3626     qep_tab[curr_tmp_table].ref_array= &items1;
3627     qep_tab[curr_tmp_table].all_fields= &tmp_all_fields1;
3628     qep_tab[curr_tmp_table].fields= &tmp_fields_list1;
3629     setup_tmptable_write_func(&qep_tab[curr_tmp_table]);
3630 
3631     /*
3632       If having is not handled here, it will be checked before the row is sent
3633       to the client.
3634     */
3635     if (having_cond &&
3636         (sort_and_group || (exec_tmp_table->distinct && !group_list)))
3637     {
3638       /*
3639         If there is no select distinct then move the having to table conds of
3640         tmp table.
3641         NOTE : We cannot apply having after distinct. If columns of having are
3642                not part of select distinct, then distinct may remove rows
3643                which can satisfy having.
3644       */
3645       if (!select_distinct && add_having_as_tmp_table_cond(curr_tmp_table))
3646         DBUG_RETURN(true);
3647 
3648       /*
3649         Having condition which we are not able to add as tmp table conds are
3650         kept as before. And, this will be applied before storing the rows in
3651         tmp table.
3652       */
3653       qep_tab[curr_tmp_table].having= having_cond;
3654       having_cond= NULL; // Already done
3655     }
3656 
3657     tmp_table_param.func_count= 0;
3658     tmp_table_param.field_count+= tmp_table_param.func_count;
3659     if (sort_and_group || qep_tab[curr_tmp_table].table()->group)
3660     {
3661       tmp_table_param.field_count+= tmp_table_param.sum_func_count;
3662       tmp_table_param.sum_func_count= 0;
3663     }
3664 
3665     if (exec_tmp_table->group)
3666     {						// Already grouped
3667       if (!order && !no_order && !skip_sort_order)
3668         order= group_list;  /* order by group */
3669       group_list= NULL;
3670     }
3671     /*
3672       If we have different sort & group then we must sort the data by group
3673       and copy it to another tmp table
3674       This code is also used if we are using distinct something
3675       we haven't been able to store in the temporary table yet
3676       like SEC_TO_TIME(SUM(...)).
3677     */
3678 
3679     if ((group_list &&
3680          (!test_if_subpart(group_list, order) || select_distinct)) ||
3681         (select_distinct && tmp_table_param.using_outer_summary_function))
3682     {					/* Must copy to another table */
3683       DBUG_PRINT("info",("Creating group table"));
3684 
3685       calc_group_buffer(this, group_list);
3686       count_field_types(select_lex, &tmp_table_param, tmp_all_fields1,
3687                         select_distinct && !group_list, false);
3688       tmp_table_param.hidden_field_count=
3689         tmp_all_fields1.elements - tmp_fields_list1.elements;
3690       sort_and_group= false;
3691       if (!exec_tmp_table->group && !exec_tmp_table->distinct)
3692       {
3693         // 1st tmp table were materializing join result
3694         materialize_join= true;
3695         explain_flags.set(ESC_BUFFER_RESULT, ESP_USING_TMPTABLE);
3696       }
3697       curr_tmp_table++;
3698       tmp_tables++;
3699 
3700       /* group data to new table */
3701       /*
3702         If the access method is loose index scan then all MIN/MAX
3703         functions are precomputed, and should be treated as regular
3704         functions. See extended comment above.
3705       */
3706       if (qep_tab[0].quick() && qep_tab[0].quick()->is_loose_index_scan())
3707         tmp_table_param.precomputed_group_by= TRUE;
3708 
3709       tmp_table_param.hidden_field_count=
3710         curr_all_fields->elements - curr_fields_list->elements;
3711       ORDER_with_src dummy= NULL; //TODO can use table->group here also
3712 
3713       if (create_intermediate_table(&qep_tab[curr_tmp_table],
3714                                     curr_all_fields, dummy, true))
3715 	DBUG_RETURN(true);
3716 
3717       if (group_list)
3718       {
3719         explain_flags.set(group_list.src, ESP_USING_TMPTABLE);
3720         if (!plan_is_const())        // No need to sort a single row
3721         {
3722           if (add_sorting_to_table(curr_tmp_table - 1, &group_list))
3723             DBUG_RETURN(true);
3724         }
3725 
3726         if (make_group_fields(this, this))
3727           DBUG_RETURN(true);
3728       }
3729 
3730       // Setup sum funcs only when necessary, otherwise we might break info
3731       // for the first table
3732       if (group_list || tmp_table_param.sum_func_count)
3733       {
3734         if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true, true))
3735           DBUG_RETURN(true);
3736         const bool need_distinct=
3737           !(qep_tab[0].quick() && qep_tab[0].quick()->is_agg_loose_index_scan());
3738         if (prepare_sum_aggregators(sum_funcs, need_distinct))
3739           DBUG_RETURN(true);
3740         group_list= NULL;
3741         if (setup_sum_funcs(thd, sum_funcs))
3742           DBUG_RETURN(true);
3743       }
3744       // No sum funcs anymore
3745       assert(items2.is_null());
3746 
3747       items2= select_lex->ref_ptr_array_slice(3);
3748       if (change_to_use_tmp_fields(thd, items2,
3749                                    tmp_fields_list2, tmp_all_fields2,
3750                                    fields_list.elements, tmp_all_fields1))
3751         DBUG_RETURN(true);
3752 
3753       curr_fields_list= &tmp_fields_list2;
3754       curr_all_fields= &tmp_all_fields2;
3755       set_items_ref_array(items2);
3756       qep_tab[curr_tmp_table].ref_array= &items2;
3757       qep_tab[curr_tmp_table].all_fields= &tmp_all_fields2;
3758       qep_tab[curr_tmp_table].fields= &tmp_fields_list2;
3759       setup_tmptable_write_func(&qep_tab[curr_tmp_table]);
3760 
3761       tmp_table_param.field_count+= tmp_table_param.sum_func_count;
3762       tmp_table_param.sum_func_count= 0;
3763     }
3764     if (qep_tab[curr_tmp_table].table()->distinct)
3765       select_distinct= false;               /* Each row is unique */
3766 
3767     if (select_distinct && !group_list)
3768     {
3769       if (having_cond)
3770       {
3771         qep_tab[curr_tmp_table].having= having_cond;
3772         having_cond->update_used_tables();
3773         having_cond= NULL;
3774       }
3775       qep_tab[curr_tmp_table].distinct= true;
3776       explain_flags.set(ESC_DISTINCT, ESP_DUPS_REMOVAL);
3777       select_distinct= false;
3778     }
3779     /* Clean tmp_table_param for the next tmp table. */
3780     tmp_table_param.field_count= tmp_table_param.sum_func_count=
3781       tmp_table_param.func_count= 0;
3782 
3783     tmp_table_param.copy_field= tmp_table_param.copy_field_end=0;
3784     first_record= sort_and_group=0;
3785 
3786     if (!group_optimized_away)
3787     {
3788       grouped= false;
3789     }
3790     else
3791     {
3792       /*
3793         If grouping has been optimized away, a temporary table is
3794         normally not needed unless we're explicitly requested to create
3795         one (e.g. due to a SQL_BUFFER_RESULT hint or INSERT ... SELECT).
3796 
3797         In this case (grouping was optimized away), temp_table was
3798         created without a grouping expression and JOIN::exec() will not
3799         perform the necessary grouping (by the use of end_send_group()
3800         or end_write_group()) if JOIN::group is set to false.
3801       */
3802       // the temporary table was explicitly requested
3803       assert(select_lex->active_options() & OPTION_BUFFER_RESULT);
3804       // the temporary table does not have a grouping expression
3805       assert(!qep_tab[curr_tmp_table].table()->group);
3806     }
3807     calc_group_buffer(this, group_list);
3808     count_field_types(select_lex, &tmp_table_param, *curr_all_fields, false,
3809                       false);
3810   }
3811 
3812   if (grouped || implicit_grouping || tmp_table_param.sum_func_count)
3813   {
3814     if (make_group_fields(this, this))
3815       DBUG_RETURN(true);
3816 
3817     assert(items3.is_null());
3818 
3819     if (items0.is_null())
3820       init_items_ref_array();
3821     items3= select_lex->ref_ptr_array_slice(4);
3822     setup_copy_fields(thd, &tmp_table_param,
3823                       items3, tmp_fields_list3, tmp_all_fields3,
3824                       curr_fields_list->elements, *curr_all_fields);
3825 
3826     curr_fields_list= &tmp_fields_list3;
3827     curr_all_fields= &tmp_all_fields3;
3828     set_items_ref_array(items3);
3829     if (qep_tab)
3830     {
3831       // Set grouped fields on the last table
3832       qep_tab[primary_tables + tmp_tables - 1].ref_array= &items3;
3833       qep_tab[primary_tables + tmp_tables - 1].all_fields= &tmp_all_fields3;
3834       qep_tab[primary_tables + tmp_tables - 1].fields= &tmp_fields_list3;
3835     }
3836     if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true, true))
3837       DBUG_RETURN(true);
3838     const bool need_distinct=
3839       !(qep_tab && qep_tab[0].quick() && qep_tab[0].quick()->is_agg_loose_index_scan());
3840     if (prepare_sum_aggregators(sum_funcs, need_distinct))
3841       DBUG_RETURN(true);
3842     if (setup_sum_funcs(thd, sum_funcs) || thd->is_fatal_error)
3843       DBUG_RETURN(true);
3844   }
3845   if (qep_tab && (group_list || order))
3846   {
3847     ASSERT_BEST_REF_IN_JOIN_ORDER(this);
3848     DBUG_PRINT("info",("Sorting for send_result_set_metadata"));
3849     THD_STAGE_INFO(thd, stage_sorting_result);
3850     /* If we have already done the group, add HAVING to sorted table */
3851     if (having_cond && !group_list && !sort_and_group)
3852     {
3853       if (add_having_as_tmp_table_cond(curr_tmp_table))
3854         DBUG_RETURN(true);
3855     }
3856 
3857     if (grouped)
3858       m_select_limit= HA_POS_ERROR;
3859     else if (!need_tmp)
3860     {
3861       /*
3862         We can abort sorting after thd->select_limit rows if there are no
3863         filter conditions for any tables after the sorted one.
3864         Filter conditions come in several forms:
3865          1. as a condition item attached to the join_tab, or
3866          2. as a keyuse attached to the join_tab (ref access).
3867       */
3868       for (uint i= const_tables + 1; i < primary_tables; i++)
3869       {
3870         QEP_TAB *const tab= qep_tab + i;
3871         if (tab->condition() ||                 // 1
3872             (best_ref[tab->idx()]->keyuse() &&
3873              tab->first_inner() == NO_PLAN_IDX))    // 2
3874         {
3875           /* We have to sort all rows */
3876           m_select_limit= HA_POS_ERROR;
3877           break;
3878         }
3879       }
3880     }
3881     /*
3882       Here we add sorting stage for ORDER BY/GROUP BY clause, if the
3883       optimiser chose FILESORT to be faster than INDEX SCAN or there is
3884       no suitable index present.
3885       OPTION_FOUND_ROWS supersedes LIMIT and is taken into account.
3886     */
3887     DBUG_PRINT("info",("Sorting for order by/group by"));
3888     ORDER_with_src order_arg= group_list ?  group_list : order;
3889     if (qep_tab &&
3890         ordered_index_usage !=
3891         (group_list ? ordered_index_group_by : ordered_index_order_by) &&
3892         qep_tab[curr_tmp_table].type() != JT_CONST &&
3893         qep_tab[curr_tmp_table].type() != JT_EQ_REF) // Don't sort 1 row
3894     {
3895       // Sort either first non-const table or the last tmp table
3896       QEP_TAB *const sort_tab= &qep_tab[curr_tmp_table];
3897       if (need_tmp && !materialize_join && !exec_tmp_table->group)
3898         explain_flags.set(order_arg.src, ESP_USING_TMPTABLE);
3899 
3900       if (add_sorting_to_table(curr_tmp_table, &order_arg))
3901         DBUG_RETURN(true);
3902       /*
3903         filesort_limit:	 Return only this many rows from filesort().
3904         We can use select_limit_cnt only if we have no group_by and 1 table.
3905         This allows us to use Bounded_queue for queries like:
3906           "select SQL_CALC_FOUND_ROWS * from t1 order by b desc limit 1;"
3907         m_select_limit == HA_POS_ERROR (we need a full table scan)
3908         unit->select_limit_cnt == 1 (we only need one row in the result set)
3909       */
3910       sort_tab->filesort->limit=
3911         (has_group_by || (primary_tables > curr_tmp_table + 1)) ?
3912          m_select_limit : unit->select_limit_cnt;
3913     }
3914     if (!plan_is_const() &&
3915         !qep_tab[const_tables].table()->sort.io_cache)
3916     {
3917       /*
3918         If no IO cache exists for the first table then we are using an
3919         INDEX SCAN and no filesort. Thus we should not remove the sorted
3920         attribute on the INDEX SCAN.
3921       */
3922       skip_sort_order= true;
3923     }
3924   }
3925   fields= curr_fields_list;
3926   // Reset before execution
3927   set_items_ref_array(items0);
3928   if (qep_tab)
3929   {
3930     qep_tab[primary_tables + tmp_tables - 1].next_select=
3931       get_end_select_func();
3932   }
3933   grouped= has_group_by;
3934 
3935   unplug_join_tabs();
3936 
3937   DBUG_RETURN(false);
3938 }
3939 
unplug_join_tabs()3940 void JOIN::unplug_join_tabs()
3941 {
3942   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
3943   for (uint i= 0; i < tables; ++i)
3944     best_ref[i]->cleanup();
3945 
3946   best_ref= NULL;
3947 }
3948 
3949 /**
3950   @brief Add Filesort object to the given table to sort if with filesort
3951 
3952   @param tab        the JOIN_TAB object to attach created Filesort object to
3953   @param sort_order List of expressions to sort the table by
3954 
3955   @note This function moves tab->select, if any, to filesort->select
3956 
3957   @return false on success, true on OOM
3958 */
3959 
3960 bool
add_sorting_to_table(uint idx,ORDER_with_src * sort_order)3961 JOIN::add_sorting_to_table(uint idx, ORDER_with_src *sort_order)
3962 {
3963   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
3964   explain_flags.set(sort_order->src, ESP_USING_FILESORT);
3965   QEP_TAB *const tab= &qep_tab[idx];
3966   tab->filesort=
3967     new (thd->mem_root) Filesort(tab, *sort_order, HA_POS_ERROR);
3968   if (!tab->filesort)
3969     return true;
3970   {
3971     if (tab->ref().key >= 0)
3972     {
3973       TABLE *table= tab->table();
3974       if (tab->quick())
3975       {
3976         /*
3977           We can only use 'Only index' if quick key is same as ref_key
3978           and in index_merge 'Only index' cannot be used
3979         */
3980         if (((uint) tab->ref().key != tab->quick()->index))
3981           table->set_keyread(FALSE);
3982       }
3983       else
3984       {
3985         /*
3986           We have a ref on a const;  Change this to a range that filesort
3987           can use.
3988           For impossible ranges (like when doing a lookup on NULL on a NOT NULL
3989           field, quick will contain an empty record set.
3990 
3991           @TODO This should be either indicated as range or filesort
3992           should start using ref directly, without switching to quick.
3993         */
3994         JOIN_TAB *const jtab= best_ref[idx];
3995         QUICK_SELECT_I *q= tab->type() == JT_FT ?
3996           get_ft_select(thd, table, tab->ref().key) :
3997           get_quick_select_for_ref(thd, table, &tab->ref(),
3998                                    jtab->found_records);
3999         if (!q)
4000           return true; /* purecov: inspected */
4001         tab->set_quick(q); // We keep it displaid as "ref".
4002       }
4003     }
4004   }
4005   tab->read_first_record= join_init_read_record;
4006   return false;
4007 }
4008 
4009 /**
4010   Find a cheaper access key than a given @a key
4011 
4012   @param          tab                 NULL or JOIN_TAB of the accessed table
4013   @param          order               Linked list of ORDER BY arguments
4014   @param          table               Table if tab == NULL or tab->table()
4015   @param          usable_keys         Key map to find a cheaper key in
4016   @param          ref_key
4017                 * 0 <= key < MAX_KEY   - key number (hint) to start the search
4018                 * -1                   - no key number provided
4019   @param          select_limit        LIMIT value, or HA_POS_ERROR if no limit
4020   @param [out]    new_key             Key number if success, otherwise undefined
4021   @param [out]    new_key_direction   Return -1 (reverse) or +1 if success,
4022                                       otherwise undefined
4023   @param [out]    new_select_limit    Return adjusted LIMIT
4024   @param [out]    new_used_key_parts  NULL by default, otherwise return number
4025                                       of new_key prefix columns if success
4026                                       or undefined if the function fails
4027   @param [out]  saved_best_key_parts  NULL by default, otherwise preserve the
4028                                       value for further use in QUICK_SELECT_DESC
4029 
4030   @note
4031     This function takes into account table->quick_condition_rows statistic
4032     (that is calculated by JOIN::make_join_plan()).
4033     However, single table procedures such as mysql_update() and mysql_delete()
4034     never call JOIN::make_join_plan(), so they have to update it manually
4035     (@see get_index_for_order()).
4036 */
4037 
4038 bool
test_if_cheaper_ordering(const JOIN_TAB * tab,ORDER * order,TABLE * table,key_map usable_keys,int ref_key,ha_rows select_limit,int * new_key,int * new_key_direction,ha_rows * new_select_limit,uint * new_used_key_parts,uint * saved_best_key_parts)4039 test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
4040                          key_map usable_keys,  int ref_key,
4041                          ha_rows select_limit,
4042                          int *new_key, int *new_key_direction,
4043                          ha_rows *new_select_limit, uint *new_used_key_parts,
4044                          uint *saved_best_key_parts)
4045 {
4046   DBUG_ENTER("test_if_cheaper_ordering");
4047   /*
4048     Check whether there is an index compatible with the given order
4049     usage of which is cheaper than usage of the ref_key index (ref_key>=0)
4050     or a table scan.
4051     It may be the case if ORDER/GROUP BY is used with LIMIT.
4052   */
4053   ha_rows best_select_limit= HA_POS_ERROR;
4054   JOIN *join= tab ? tab->join() : NULL;
4055   if (join)
4056     ASSERT_BEST_REF_IN_JOIN_ORDER(join);
4057   uint nr;
4058   key_map keys;
4059   uint best_key_parts= 0;
4060   int best_key_direction= 0;
4061   ha_rows best_records= 0;
4062   double read_time;
4063   int best_key= -1;
4064   bool is_best_covering= FALSE;
4065   double fanout= 1;
4066   ha_rows table_records= table->file->stats.records;
4067   bool group= join && join->grouped && order == join->group_list;
4068   double refkey_rows_estimate= static_cast<double>(table->quick_condition_rows);
4069   const bool has_limit= (select_limit != HA_POS_ERROR);
4070   const join_type cur_access_method= tab ? tab->type() : JT_ALL;
4071 
4072   /*
4073     If not used with LIMIT, only use keys if the whole query can be
4074     resolved with a key;  This is because filesort() is usually faster than
4075     retrieving all rows through an index.
4076   */
4077   if (select_limit >= table_records)
4078   {
4079     keys= *table->file->keys_to_use_for_scanning();
4080     keys.merge(table->covering_keys);
4081 
4082     /*
4083       We are adding here also the index specified in FORCE INDEX clause,
4084       if any.
4085       This is to allow users to use index in ORDER BY.
4086     */
4087     if (table->force_index)
4088       keys.merge(group ? table->keys_in_use_for_group_by :
4089                          table->keys_in_use_for_order_by);
4090     keys.intersect(usable_keys);
4091   }
4092   else
4093     keys= usable_keys;
4094 
4095   if (join)
4096   {
4097     read_time= tab->position()->read_cost;
4098     for (uint jt= tab->idx() + 1; jt < join->primary_tables; jt++)
4099     {
4100       POSITION *pos= join->best_ref[jt]->position();
4101       fanout*= pos->rows_fetched * pos->filter_effect;
4102     }
4103   }
4104   else
4105     read_time= table->file->table_scan_cost().total_cost();
4106 
4107   /*
4108     Calculate the selectivity of the ref_key for REF_ACCESS. For
4109     RANGE_ACCESS we use table->quick_condition_rows.
4110   */
4111   if (ref_key >= 0 && cur_access_method == JT_REF)
4112   {
4113     if (table->quick_keys.is_set(ref_key))
4114       refkey_rows_estimate= static_cast<double>(table->quick_rows[ref_key]);
4115     else
4116     {
4117       const KEY *ref_keyinfo= table->key_info + ref_key;
4118       if (ref_keyinfo->has_records_per_key(tab->ref().key_parts - 1))
4119         refkey_rows_estimate=
4120           ref_keyinfo->records_per_key(tab->ref().key_parts - 1);
4121       else
4122         refkey_rows_estimate= 1.0;              // No index statistics
4123     }
4124     assert(refkey_rows_estimate >= 1.0);
4125   }
4126 
4127   for (nr=0; nr < table->s->keys ; nr++)
4128   {
4129     int direction;
4130     uint used_key_parts;
4131 
4132     if (keys.is_set(nr) &&
4133         (direction= test_if_order_by_key(order, table, nr, &used_key_parts)))
4134     {
4135       /*
4136         At this point we are sure that ref_key is a non-ordering
4137         key (where "ordering key" is a key that will return rows
4138         in the order required by ORDER BY).
4139       */
4140       assert (ref_key != (int) nr);
4141 
4142       bool is_covering= table->covering_keys.is_set(nr) ||
4143                         (nr == table->s->primary_key &&
4144                         table->file->primary_key_is_clustered());
4145 
4146       /*
4147         Don't use an index scan with ORDER BY without limit.
4148         For GROUP BY without limit always use index scan
4149         if there is a suitable index.
4150         Why we hold to this asymmetry hardly can be explained
4151         rationally. It's easy to demonstrate that using
4152         temporary table + filesort could be cheaper for grouping
4153         queries too.
4154       */
4155       if (is_covering ||
4156           select_limit != HA_POS_ERROR ||
4157           (ref_key < 0 && (group || table->force_index)))
4158       {
4159         rec_per_key_t rec_per_key;
4160         KEY *keyinfo= table->key_info+nr;
4161         if (select_limit == HA_POS_ERROR)
4162           select_limit= table_records;
4163         if (group)
4164         {
4165           /*
4166             Used_key_parts can be larger than keyinfo->key_parts
4167             when using a secondary index clustered with a primary
4168             key (e.g. as in Innodb).
4169             See Bug #28591 for details.
4170           */
4171           rec_per_key= used_key_parts &&
4172                        used_key_parts <= actual_key_parts(keyinfo) ?
4173                        keyinfo->records_per_key(used_key_parts - 1) : 1.0f;
4174           set_if_bigger(rec_per_key, 1.0f);
4175           /*
4176             With a grouping query each group containing on average
4177             rec_per_key records produces only one row that will
4178             be included into the result set.
4179           */
4180           if (select_limit > table_records/rec_per_key)
4181             select_limit= table_records;
4182           else
4183             select_limit= (ha_rows) (select_limit*rec_per_key);
4184         }
4185         /*
4186           If tab=tk is not the last joined table tn then to get first
4187           L records from the result set we can expect to retrieve
4188           only L/fanout(tk,tn) where fanout(tk,tn) says how many
4189           rows in the record set on average will match each row tk.
4190           Usually our estimates for fanouts are too pessimistic.
4191           So the estimate for L/fanout(tk,tn) will be too optimistic
4192           and as result we'll choose an index scan when using ref/range
4193           access + filesort will be cheaper.
4194         */
4195         select_limit= (ha_rows) (select_limit < fanout ?
4196                                  1 : select_limit/fanout);
4197         /*
4198           We assume that each of the tested indexes is not correlated
4199           with ref_key. Thus, to select first N records we have to scan
4200           N/selectivity(ref_key) index entries.
4201           selectivity(ref_key) = #scanned_records/#table_records =
4202           refkey_rows_estimate/table_records.
4203           In any case we can't select more than #table_records.
4204           N/(refkey_rows_estimate/table_records) > table_records
4205           <=> N > refkey_rows_estimate.
4206          */
4207         if (select_limit > refkey_rows_estimate)
4208           select_limit= table_records;
4209         else
4210           select_limit= (ha_rows) (select_limit *
4211                                    (double) table_records /
4212                                     refkey_rows_estimate);
4213         rec_per_key=
4214           keyinfo->records_per_key(keyinfo->user_defined_key_parts - 1);
4215         set_if_bigger(rec_per_key, 1.0f);
4216         /*
4217           Here we take into account the fact that rows are
4218           accessed in sequences rec_per_key records in each.
4219           Rows in such a sequence are supposed to be ordered
4220           by rowid/primary key. When reading the data
4221           in a sequence we'll touch not more pages than the
4222           table file contains.
4223           TODO. Use the formula for a disk sweep sequential access
4224           to calculate the cost of accessing data rows for one
4225           index entry.
4226         */
4227         const Cost_estimate table_scan_time= table->file->table_scan_cost();
4228         const double index_scan_time= select_limit / rec_per_key *
4229           min<double>(table->cost_model()->page_read_cost(rec_per_key),
4230                       table_scan_time.total_cost());
4231 
4232         /*
4233           Switch to index that gives order if its scan time is smaller than
4234           read_time of current chosen access method. In addition, if the
4235           current chosen access method is index scan or table scan, always
4236           switch to the index that gives order when it is covering or when
4237           force index or group by is present.
4238         */
4239         if (((cur_access_method == JT_ALL || cur_access_method == JT_INDEX_SCAN)
4240              && (is_covering || group || table->force_index)) ||
4241             index_scan_time < read_time)
4242         {
4243           ha_rows quick_records= table_records;
4244           const ha_rows refkey_select_limit=
4245             (ref_key >= 0 && table->covering_keys.is_set(ref_key)) ?
4246             static_cast<ha_rows>(refkey_rows_estimate) :
4247             HA_POS_ERROR;
4248 
4249           if ((is_best_covering && !is_covering) ||
4250               (is_covering && refkey_select_limit < select_limit))
4251             continue;
4252           if (table->quick_keys.is_set(nr))
4253             quick_records= table->quick_rows[nr];
4254           if (best_key < 0 ||
4255               (select_limit <= min(quick_records,best_records) ?
4256                keyinfo->user_defined_key_parts < best_key_parts :
4257                quick_records < best_records))
4258           {
4259             best_key= nr;
4260             best_key_parts= keyinfo->user_defined_key_parts;
4261             if (saved_best_key_parts)
4262               *saved_best_key_parts= used_key_parts;
4263             best_records= quick_records;
4264             is_best_covering= is_covering;
4265             best_key_direction= direction;
4266             best_select_limit= select_limit;
4267           }
4268         }
4269       }
4270     }
4271   }
4272 
4273   if (best_key < 0 || best_key == ref_key)
4274     DBUG_RETURN(FALSE);
4275 
4276   *new_key= best_key;
4277   *new_key_direction= best_key_direction;
4278   *new_select_limit= has_limit ? best_select_limit : table_records;
4279   if (new_used_key_parts != NULL)
4280     *new_used_key_parts= best_key_parts;
4281 
4282   DBUG_RETURN(TRUE);
4283 }
4284 
4285 
4286 /**
4287   Find a key to apply single table UPDATE/DELETE by a given ORDER
4288 
4289   @param       order           Linked list of ORDER BY arguments
4290   @param       table           Table to find a key
4291   @param       select          Pointer to access/update select->quick (if any)
4292   @param       limit           LIMIT clause parameter
4293   @param [out] need_sort       TRUE if filesort needed
4294   @param [out] reverse
4295     TRUE if the key is reversed again given ORDER (undefined if key == MAX_KEY)
4296 
4297   @return
4298     - MAX_KEY if no key found                        (need_sort == TRUE)
4299     - MAX_KEY if quick select result order is OK     (need_sort == FALSE)
4300     - key number (either index scan or quick select) (need_sort == FALSE)
4301 
4302   @note
4303     Side effects:
4304     - may deallocate or deallocate and replace select->quick;
4305     - may set table->quick_condition_rows and table->quick_rows[...]
4306       to table->file->stats.records.
4307 */
4308 
get_index_for_order(ORDER * order,QEP_TAB * tab,ha_rows limit,bool * need_sort,bool * reverse)4309 uint get_index_for_order(ORDER *order, QEP_TAB *tab,
4310                          ha_rows limit, bool *need_sort, bool *reverse)
4311 {
4312   if (tab->quick() && tab->quick()->unique_key_range())
4313   { // Single row select (always "ordered"): Ok to use with key field UPDATE
4314     *need_sort= FALSE;
4315     /*
4316       Returning of MAX_KEY here prevents updating of used_key_is_modified
4317       in mysql_update(). Use quick select "as is".
4318     */
4319     return MAX_KEY;
4320   }
4321 
4322   TABLE *const table= tab->table();
4323 
4324   if (!order)
4325   {
4326     *need_sort= FALSE;
4327     if (tab->quick())
4328       return tab->quick()->index; // index or MAX_KEY, use quick select as is
4329     else
4330       return table->file->key_used_on_scan; // MAX_KEY or index for some engines
4331   }
4332 
4333   if (!is_simple_order(order)) // just to cut further expensive checks
4334   {
4335     *need_sort= TRUE;
4336     return MAX_KEY;
4337   }
4338 
4339   if (tab->quick())
4340   {
4341     if (tab->quick()->index == MAX_KEY)
4342     {
4343       *need_sort= TRUE;
4344       return MAX_KEY;
4345     }
4346 
4347     uint used_key_parts;
4348     switch (test_if_order_by_key(order, table, tab->quick()->index,
4349                                  &used_key_parts)) {
4350     case 1: // desired order
4351       *need_sort= FALSE;
4352       return tab->quick()->index;
4353     case 0: // unacceptable order
4354       *need_sort= TRUE;
4355       return MAX_KEY;
4356     case -1: // desired order, but opposite direction
4357       {
4358         QUICK_SELECT_I *reverse_quick;
4359         if ((reverse_quick=
4360                tab->quick()->make_reverse(used_key_parts)))
4361         {
4362           delete tab->quick();
4363           tab->set_quick(reverse_quick);
4364           tab->set_type(calc_join_type(reverse_quick->get_type()));
4365           *need_sort= FALSE;
4366           return reverse_quick->index;
4367         }
4368         else
4369         {
4370           *need_sort= TRUE;
4371           return MAX_KEY;
4372         }
4373       }
4374     }
4375     assert(0);
4376   }
4377   else if (limit != HA_POS_ERROR)
4378   { // check if some index scan & LIMIT is more efficient than filesort
4379 
4380     /*
4381       Update quick_condition_rows since single table UPDATE/DELETE procedures
4382       don't call JOIN::make_join_plan() and leave this variable uninitialized.
4383     */
4384     table->quick_condition_rows= table->file->stats.records;
4385 
4386     int key, direction;
4387     if (test_if_cheaper_ordering(NULL, order, table,
4388                                  table->keys_in_use_for_order_by, -1,
4389                                  limit,
4390                                  &key, &direction, &limit))
4391     {
4392       *need_sort= FALSE;
4393       *reverse= (direction < 0);
4394       return key;
4395     }
4396   }
4397   *need_sort= TRUE;
4398   return MAX_KEY;
4399 }
4400 
4401 
4402 /**
4403   Returns number of key parts depending on
4404   OPTIMIZER_SWITCH_USE_INDEX_EXTENSIONS flag.
4405 
4406   @param  key_info  pointer to KEY structure
4407 
4408   @return number of key parts.
4409 */
4410 
actual_key_parts(const KEY * key_info)4411 uint actual_key_parts(const KEY *key_info)
4412 {
4413   return key_info->table->in_use->
4414     optimizer_switch_flag(OPTIMIZER_SWITCH_USE_INDEX_EXTENSIONS) ?
4415     key_info->actual_key_parts : key_info->user_defined_key_parts;
4416 }
4417 
4418 
4419 /**
4420   Returns key flags depending on
4421   OPTIMIZER_SWITCH_USE_INDEX_EXTENSIONS flag.
4422 
4423   @param  key_info  pointer to KEY structure
4424 
4425   @return key flags.
4426 */
4427 
actual_key_flags(KEY * key_info)4428 uint actual_key_flags(KEY *key_info)
4429 {
4430   return key_info->table->in_use->
4431     optimizer_switch_flag(OPTIMIZER_SWITCH_USE_INDEX_EXTENSIONS) ?
4432     key_info->actual_flags : key_info->flags;
4433 }
4434 
4435 
calc_join_type(int quick_type)4436 join_type calc_join_type(int quick_type)
4437 {
4438   if ((quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) ||
4439       (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) ||
4440       (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION))
4441     return JT_INDEX_MERGE;
4442   else
4443     return JT_RANGE;
4444 }
4445 
4446 
4447 /**
4448   @} (end of group Query_Optimizer)
4449 */
4450