1 /* Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23 /**
24 @file
25
26 @brief
27 mysql_select and join optimization
28
29
30 @defgroup Query_Optimizer Query Optimizer
31 @{
32 */
33
34 #include "sql_priv.h"
35 #include "sql_select.h"
36 #include "sql_table.h" // primary_key_name
37 #include "sql_derived.h"
38 #include "probes_mysql.h"
39 #include "opt_trace.h"
40 #include "key.h" // key_copy, key_cmp, key_cmp_if_same
41 #include "lock.h" // mysql_unlock_some_tables,
42 // mysql_unlock_read_tables
43 #include "sql_show.h" // append_identifier
44 #include "sql_base.h" // setup_wild, setup_fields, fill_record
45 #include "sql_acl.h" // *_ACL
46 #include "sql_test.h" // misc. debug printing utilities
47 #include "records.h" // init_read_record, end_read_record
48 #include "filesort.h" // filesort_free_buffers
49 #include "sql_union.h" // mysql_union
50 #include "opt_explain.h"
51 #include "sql_join_buffer.h" // JOIN_CACHE
52 #include "sql_optimizer.h" // JOIN
53 #include "sql_tmp_table.h" // tmp tables
54
55 #include <algorithm>
56 using std::max;
57 using std::min;
58
59 static store_key *get_store_key(THD *thd,
60 Key_use *keyuse, table_map used_tables,
61 KEY_PART_INFO *key_part, uchar *key_buff,
62 uint maybe_null);
63 bool const_expression_in_where(Item *conds,Item *item, Item **comp_item);
64 uint find_shortest_key(TABLE *table, const key_map *usable_keys);
65 static bool test_if_cheaper_ordering(const JOIN_TAB *tab,
66 ORDER *order, TABLE *table,
67 key_map usable_keys, int key,
68 ha_rows select_limit,
69 int *new_key, int *new_key_direction,
70 ha_rows *new_select_limit,
71 uint *new_used_key_parts= NULL,
72 uint *saved_best_key_parts= NULL);
73 static uint join_buffer_alg(const THD *thd);
74 static void push_index_cond(JOIN_TAB *tab, uint keyno, bool other_tbls_ok,
75 Opt_trace_object *trace_obj);
76
77 /**
78 This handles SELECT with and without UNION
79 */
80
handle_select(THD * thd,select_result * result,ulong setup_tables_done_option)81 bool handle_select(THD *thd, select_result *result,
82 ulong setup_tables_done_option)
83 {
84 bool res;
85 LEX *lex= thd->lex;
86 SELECT_LEX *select_lex = &lex->select_lex;
87 DBUG_ENTER("handle_select");
88 MYSQL_SELECT_START(thd->query());
89
90 if (lex->proc_analyse && lex->sql_command != SQLCOM_SELECT)
91 {
92 my_error(ER_WRONG_USAGE, MYF(0), "PROCEDURE", "non-SELECT");
93 DBUG_RETURN(true);
94 }
95
96 if (select_lex->master_unit()->is_union() ||
97 select_lex->master_unit()->fake_select_lex)
98 res= mysql_union(thd, lex, result, &lex->unit, setup_tables_done_option);
99 else
100 {
101 SELECT_LEX_UNIT *unit= &lex->unit;
102 unit->set_limit(unit->global_parameters);
103 /*
104 'options' of mysql_select will be set in JOIN, as far as JOIN for
105 every PS/SP execution new, we will not need reset this flag if
106 setup_tables_done_option changed for next rexecution
107 */
108 res= mysql_select(thd,
109 select_lex->table_list.first,
110 select_lex->with_wild, select_lex->item_list,
111 select_lex->where,
112 &select_lex->order_list,
113 &select_lex->group_list,
114 select_lex->having,
115 select_lex->options | thd->variables.option_bits |
116 setup_tables_done_option,
117 result, unit, select_lex);
118 }
119 DBUG_PRINT("info",("res: %d report_error: %d", res,
120 thd->is_error()));
121 res|= thd->is_error();
122 if (unlikely(res))
123 result->abort_result_set();
124
125 MYSQL_SELECT_DONE((int) res, (ulong) thd->limit_found_rows);
126 DBUG_RETURN(res);
127 }
128
129
130 /*****************************************************************************
131 Check fields, find best join, do the select and output fields.
132 All tables must be opened.
133 *****************************************************************************/
134
135 /**
136 @brief Check if two items are compatible wrt. materialization.
137
138 @param outer Expression from outer query
139 @param inner Expression from inner query
140
141 @retval TRUE If subquery types allow materialization.
142 @retval FALSE Otherwise.
143 */
144
types_allow_materialization(Item * outer,Item * inner)145 bool types_allow_materialization(Item *outer, Item *inner)
146
147 {
148 if (outer->result_type() != inner->result_type())
149 return FALSE;
150 switch (outer->result_type()) {
151 case STRING_RESULT:
152 if (outer->is_temporal_with_date() != inner->is_temporal_with_date())
153 return FALSE;
154 if (!(outer->collation.collation == inner->collation.collation
155 /*&& outer->max_length <= inner->max_length */))
156 return FALSE;
157 /*case INT_RESULT:
158 if (!(outer->unsigned_flag ^ inner->unsigned_flag))
159 return FALSE; */
160 default:
161 ; /* suitable for materialization */
162 }
163 return TRUE;
164 }
165
166
167 /*
168 Check if the table's rowid is included in the temptable
169
170 SYNOPSIS
171 sj_table_is_included()
172 join The join
173 join_tab The table to be checked
174
175 DESCRIPTION
176 SemiJoinDuplicateElimination: check the table's rowid should be included
177 in the temptable. This is so if
178
179 1. The table is not embedded within some semi-join nest
180 2. The has been pulled out of a semi-join nest, or
181
182 3. The table is functionally dependent on some previous table
183
184 [4. This is also true for constant tables that can't be
185 NULL-complemented but this function is not called for such tables]
186
187 RETURN
188 TRUE - Include table's rowid
189 FALSE - Don't
190 */
191
sj_table_is_included(JOIN * join,JOIN_TAB * join_tab)192 static bool sj_table_is_included(JOIN *join, JOIN_TAB *join_tab)
193 {
194 if (join_tab->emb_sj_nest)
195 return FALSE;
196
197 /* Check if this table is functionally dependent on the tables that
198 are within the same outer join nest
199 */
200 TABLE_LIST *embedding= join_tab->table->pos_in_table_list->embedding;
201 if (join_tab->type == JT_EQ_REF)
202 {
203 table_map depends_on= 0;
204 uint idx;
205
206 for (uint kp= 0; kp < join_tab->ref.key_parts; kp++)
207 depends_on |= join_tab->ref.items[kp]->used_tables();
208
209 Table_map_iterator it(depends_on & ~PSEUDO_TABLE_BITS);
210 while ((idx= it.next_bit())!=Table_map_iterator::BITMAP_END)
211 {
212 JOIN_TAB *ref_tab= join->map2table[idx];
213 if (embedding != ref_tab->table->pos_in_table_list->embedding)
214 return TRUE;
215 }
216 /* Ok, functionally dependent */
217 return FALSE;
218 }
219 /* Not functionally dependent => need to include*/
220 return TRUE;
221 }
222
223 /**
224 Check if the optimizer might choose to use join buffering for this
225 join. If that is the case, and if duplicate weedout semijoin
226 strategy is used, the duplicate generating range must be extended
227 to the first non-const table.
228
229 This function is called from setup_semijoin_dups_elimination()
230 before the final decision is made on whether or not buffering is
231 used. It is therefore only a rough test that covers all cases where
232 join buffering might be used, but potentially also some cases where
233 join buffering will not be used.
234
235 @param join_buffer_alg Bitmap with possible join buffer algorithms
236 @param sj_tab Table that might be joined by BNL/BKA
237
238 @return
239 true if join buffering might be used, false otherwise
240
241 */
might_do_join_buffering(uint join_buffer_alg,const JOIN_TAB * sj_tab)242 static bool might_do_join_buffering(uint join_buffer_alg,
243 const JOIN_TAB *sj_tab)
244 {
245 /*
246 (1) sj_tab is not a const table
247 */
248 int sj_tabno= sj_tab - sj_tab->join->join_tab;
249 return (sj_tabno >= (int)sj_tab->join->const_tables && // (1)
250 sj_tab->use_quick != QS_DYNAMIC_RANGE &&
251 (((join_buffer_alg & JOIN_CACHE::ALG_BNL) &&
252 sj_tab->type == JT_ALL) ||
253 ((join_buffer_alg &
254 (JOIN_CACHE::ALG_BKA | JOIN_CACHE::ALG_BKA_UNIQUE)) &&
255 (sj_tab->type == JT_REF ||
256 sj_tab->type == JT_EQ_REF ||
257 sj_tab->type == JT_CONST))));
258 }
259
260 /**
261 Setup the strategies to eliminate semi-join duplicates.
262
263 @param join Join to process
264 @param options Join options (needed to see if join buffering will be
265 used or not)
266 @param no_jbuf_after Do not use join buffering after the table with this
267 number
268
269 @retval FALSE OK
270 @retval TRUE Out of memory error
271
272 @details
273 Setup the strategies to eliminate semi-join duplicates.
274 At the moment there are 5 strategies:
275
276 1. DuplicateWeedout (use of temptable to remove duplicates based on rowids
277 of row combinations)
278 2. FirstMatch (pick only the 1st matching row combination of inner tables)
279 3. LooseScan (scanning the sj-inner table in a way that groups duplicates
280 together and picking the 1st one)
281 4. MaterializeLookup (Materialize inner tables, then setup a scan over
282 outer correlated tables, lookup in materialized table)
283 5. MaterializeScan (Materialize inner tables, then setup a scan over
284 materialized tables, perform lookup in outer tables)
285
286 The join order has "duplicate-generating ranges", and every range is
287 served by one strategy or a combination of FirstMatch with with some
288 other strategy.
289
290 "Duplicate-generating range" is defined as a range within the join order
291 that contains all of the inner tables of a semi-join. All ranges must be
292 disjoint, if tables of several semi-joins are interleaved, then the ranges
293 are joined together, which is equivalent to converting
294 SELECT ... WHERE oe1 IN (SELECT ie1 ...) AND oe2 IN (SELECT ie2 )
295 to
296 SELECT ... WHERE (oe1, oe2) IN (SELECT ie1, ie2 ... ...)
297 .
298
299 Applicability conditions are as follows:
300
301 DuplicateWeedout strategy
302 ~~~~~~~~~~~~~~~~~~~~~~~~~
303
304 (ot|nt)* [ it ((it|ot|nt)* (it|ot))] (nt)*
305 +------+ +=========================+ +---+
306 (1) (2) (3)
307
308 (1) - Prefix of OuterTables (those that participate in
309 IN-equality and/or are correlated with subquery) and outer
310 Non-correlated tables.
311 (2) - The handled range. The range starts with the first sj-inner
312 table, and covers all sj-inner and outer tables
313 Within the range, Inner, Outer, outer non-correlated tables
314 may follow in any order.
315 (3) - The suffix of outer non-correlated tables.
316
317 FirstMatch strategy
318 ~~~~~~~~~~~~~~~~~~~
319
320 (ot|nt)* [ it ((it|nt)* it) ] (nt)*
321 +------+ +==================+ +---+
322 (1) (2) (3)
323
324 (1) - Prefix of outer correlated and non-correlated tables
325 (2) - The handled range, which may contain only inner and
326 non-correlated tables.
327 (3) - The suffix of outer non-correlated tables.
328
329 LooseScan strategy
330 ~~~~~~~~~~~~~~~~~~
331
332 (ot|ct|nt) [ loosescan_tbl (ot|nt|it)* it ] (ot|nt)*
333 +--------+ +===========+ +=============+ +------+
334 (1) (2) (3) (4)
335
336 (1) - Prefix that may contain any outer tables. The prefix must contain
337 all the non-trivially correlated outer tables. (non-trivially means
338 that the correlation is not just through the IN-equality).
339
340 (2) - Inner table for which the LooseScan scan is performed.
341 Notice that special requirements for existence of certain indexes
342 apply to this table, @see class Loose_scan_opt.
343
344 (3) - The remainder of the duplicate-generating range. It is served by
345 application of FirstMatch strategy. Outer IN-correlated tables
346 must be correlated to the LooseScan table but not to the inner
347 tables in this range. (Currently, there can be no outer tables
348 in this range because of implementation restrictions,
349 @see Optimize_table_order::advance_sj_state()).
350
351 (4) - The suffix of outer correlated and non-correlated tables.
352
353 MaterializeLookup strategy
354 ~~~~~~~~~~~~~~~~~~~~~~~~~~
355
356 (ot|nt)* [ it (it)* ] (nt)*
357 +------+ +==========+ +---+
358 (1) (2) (3)
359
360 (1) - Prefix of outer correlated and non-correlated tables.
361
362 (2) - The handled range, which may contain only inner tables.
363 The inner tables are materialized in a temporary table that is
364 later used as a lookup structure for the outer correlated tables.
365
366 (3) - The suffix of outer non-correlated tables.
367
368 MaterializeScan strategy
369 ~~~~~~~~~~~~~~~~~~~~~~~~~~
370
371 (ot|nt)* [ it (it)* ] (ot|nt)*
372 +------+ +==========+ +-----+
373 (1) (2) (3)
374
375 (1) - Prefix of outer correlated and non-correlated tables.
376
377 (2) - The handled range, which may contain only inner tables.
378 The inner tables are materialized in a temporary table which is
379 later used to setup a scan.
380
381 (3) - The suffix of outer correlated and non-correlated tables.
382
383 Note that MaterializeLookup and MaterializeScan has overlap in their patterns.
384 It may be possible to consolidate the materialization strategies into one.
385
386 The choice between the strategies is made by the join optimizer (see
387 advance_sj_state() and fix_semijoin_strategies()).
388 This function sets up all fields/structures/etc needed for execution except
389 for setup/initialization of semi-join materialization which is done in
390 setup_materialized_table().
391 */
392
setup_semijoin_dups_elimination(JOIN * join,ulonglong options,uint no_jbuf_after)393 static bool setup_semijoin_dups_elimination(JOIN *join, ulonglong options,
394 uint no_jbuf_after)
395 {
396 uint tableno;
397 THD *thd= join->thd;
398 DBUG_ENTER("setup_semijoin_dups_elimination");
399
400 if (join->select_lex->sj_nests.is_empty())
401 DBUG_RETURN(FALSE);
402
403 for (tableno= join->const_tables; tableno < join->primary_tables; )
404 {
405 JOIN_TAB *const tab= join->join_tab + tableno;
406 POSITION *const pos= tab->position;
407 uint keylen, keyno;
408 if (pos->sj_strategy == SJ_OPT_NONE)
409 {
410 tableno++; // nothing to do
411 continue;
412 }
413 JOIN_TAB *last_sj_tab= tab + pos->n_sj_tables - 1;
414 switch (pos->sj_strategy) {
415 case SJ_OPT_MATERIALIZE_LOOKUP:
416 case SJ_OPT_MATERIALIZE_SCAN:
417 DBUG_ASSERT(false); // Should not occur among "primary" tables
418 // Do nothing
419 tableno+= pos->n_sj_tables;
420 break;
421 case SJ_OPT_LOOSE_SCAN:
422 {
423 DBUG_ASSERT(tab->emb_sj_nest != NULL); // First table must be inner
424 /* We jump from the last table to the first one */
425 tab->match_tab= last_sj_tab;
426
427 /* For LooseScan, duplicate elimination is based on rows being sorted
428 on key. We need to make sure that range select keeps the sorted index
429 order. (When using MRR it may not.)
430
431 Note: need_sorted_output() implementations for range select classes
432 that do not support sorted output, will trigger an assert. This
433 should not happen since LooseScan strategy is only picked if sorted
434 output is supported.
435 */
436 if (tab->select && tab->select->quick)
437 {
438 if (tab->select->quick->index == pos->loosescan_key)
439 tab->select->quick->need_sorted_output();
440 else
441 tab->select->set_quick(NULL);
442 }
443 /* Calculate key length */
444 keylen= 0;
445 keyno= pos->loosescan_key;
446 for (uint kp=0; kp < pos->loosescan_parts; kp++)
447 keylen += tab->table->key_info[keyno].key_part[kp].store_length;
448
449 tab->loosescan_key_len= keylen;
450 if (pos->n_sj_tables > 1)
451 {
452 last_sj_tab->firstmatch_return= tab;
453 last_sj_tab->match_tab= last_sj_tab;
454 }
455 tableno+= pos->n_sj_tables;
456 break;
457 }
458 case SJ_OPT_DUPS_WEEDOUT:
459 {
460 DBUG_ASSERT(tab->emb_sj_nest != NULL); // First table must be inner
461 /*
462 Consider a semijoin of one outer and one inner table, both
463 with two rows. The inner table is assumed to be confluent
464 (See sj_opt_materialize_lookup)
465
466 If normal nested loop execution is used, we do not need to
467 include semi-join outer table rowids in the duplicate
468 weedout temp table since NL guarantees that outer table rows
469 are encountered only consecutively and because all rows in
470 the temp table are deleted for every new outer table
471 combination (example is with a confluent inner table):
472
473 ot1.row1|it1.row1
474 '-> temp table's have_confluent_row == FALSE
475 |-> output ot1.row1
476 '-> set have_confluent_row= TRUE
477 ot1.row1|it1.row2
478 |-> temp table's have_confluent_row == TRUE
479 | '-> do not output ot1.row1
480 '-> no more join matches - set have_confluent_row= FALSE
481 ot1.row2|it1.row1
482 '-> temp table's have_confluent_row == FALSE
483 |-> output ot1.row2
484 '-> set have_confluent_row= TRUE
485 ...
486
487 Note: not having outer table rowids in the temp table and
488 then emptying the temp table when a new outer table row
489 combinition is encountered is an optimization. Including
490 outer table rowids in the temp table is not harmful but
491 wastes memory.
492
493 Now consider the join buffering algorithms (BNL/BKA). These
494 algorithms join each inner row with outer rows in "reverse"
495 order compared to NL. Effectively, this means that outer
496 table rows may be encountered multiple times in a
497 non-consecutive manner:
498
499 NL: BNL/BKA:
500 ot1.row1|it1.row1 ot1.row1|it1.row1
501 ot1.row1|it1.row2 ot1.row2|it1.row1
502 ot1.row2|it1.row1 ot1.row1|it1.row2
503 ot1.row2|it1.row2 ot1.row2|it1.row2
504
505 It is clear from the above that there is no place we can
506 empty the temp table like we do in NL to avoid storing outer
507 table rowids.
508
509 Below we check if join buffering might be used. If so, set
510 first_table to the first non-constant table so that outer
511 table rowids are included in the temp table. Do not destroy
512 other duplicate elimination methods.
513 */
514 uint first_table= tableno;
515 for (uint sj_tableno= tableno;
516 sj_tableno < tableno + pos->n_sj_tables;
517 sj_tableno++)
518 {
519 /*
520 The final decision on whether or not join buffering will
521 be used is taken in setup_join_buffering(), which is
522 called from make_join_readinfo()'s main loop.
523 setup_join_buffering() needs to know if duplicate weedout is used,
524 so moving setup_semijoin_dups_elimination() from before the main
525 loop to after it is not possible. I.e.,
526 join->join_tab[sj_tableno]->position->use_join_buffer is not
527 trustworthy at this point.
528 */
529 /**
530 @todo: merge make_join_readinfo() and
531 setup_semijoin_dups_elimination() loops and change the
532 following 'if' to
533
534 "if (join->join_tab[sj_tableno]->position->use_join_buffer &&
535 sj_tableno <= no_jbuf_after)".
536
537 For now, use a rough criteria:
538 */
539
540 if (sj_tableno <= no_jbuf_after &&
541 might_do_join_buffering(join_buffer_alg(thd),
542 join->join_tab + sj_tableno))
543
544 {
545 /* Join buffering will probably be used */
546 first_table= join->const_tables;
547 break;
548 }
549 }
550
551 JOIN_TAB *const first_sj_tab= join->join_tab + first_table;
552 if (last_sj_tab->first_inner != NULL &&
553 first_sj_tab->first_inner != last_sj_tab->first_inner)
554 {
555 /*
556 The first duplicate weedout table is an outer table of an outer join
557 and the last duplicate weedout table is one of the inner tables of
558 the outer join.
559 In this case, we must assure that all the inner tables of the
560 outer join are part of the duplicate weedout operation.
561 This is to assure that NULL-extension for inner tables of an
562 outer join is performed before duplicate elimination is performed,
563 otherwise we will have extra NULL-extended rows being output, which
564 should have been eliminated as duplicates.
565 */
566 JOIN_TAB *tab= last_sj_tab->first_inner;
567 /*
568 First, locate the table that is the first inner table of the
569 outer join operation that first_sj_tab is outer for.
570 */
571 while (tab->first_upper != NULL &&
572 tab->first_upper != first_sj_tab->first_inner)
573 tab= tab->first_upper;
574 // Then, extend the range with all inner tables of the join nest:
575 if (tab->first_inner->last_inner > last_sj_tab)
576 last_sj_tab= tab->first_inner->last_inner;
577 }
578
579 SJ_TMP_TABLE::TAB sjtabs[MAX_TABLES];
580 SJ_TMP_TABLE::TAB *last_tab= sjtabs;
581 uint jt_rowid_offset= 0; // # tuple bytes are already occupied (w/o NULL bytes)
582 uint jt_null_bits= 0; // # null bits in tuple bytes
583 /*
584 Walk through the range and remember
585 - tables that need their rowids to be put into temptable
586 - the last outer table
587 */
588 for (JOIN_TAB *tab_in_range= join->join_tab + first_table;
589 tab_in_range <= last_sj_tab;
590 tab_in_range++)
591 {
592 if (sj_table_is_included(join, tab_in_range))
593 {
594 last_tab->join_tab= tab_in_range;
595 last_tab->rowid_offset= jt_rowid_offset;
596 jt_rowid_offset += tab_in_range->table->file->ref_length;
597 if (tab_in_range->table->maybe_null)
598 {
599 last_tab->null_byte= jt_null_bits / 8;
600 last_tab->null_bit= jt_null_bits++;
601 }
602 last_tab++;
603 tab_in_range->table->prepare_for_position();
604 tab_in_range->keep_current_rowid= TRUE;
605 }
606 }
607
608 SJ_TMP_TABLE *sjtbl;
609 if (jt_rowid_offset) /* Temptable has at least one rowid */
610 {
611 uint tabs_size= (last_tab - sjtabs) * sizeof(SJ_TMP_TABLE::TAB);
612 if (!(sjtbl= new (thd->mem_root) SJ_TMP_TABLE) ||
613 !(sjtbl->tabs= (SJ_TMP_TABLE::TAB*) thd->alloc(tabs_size)))
614 DBUG_RETURN(TRUE); /* purecov: inspected */
615 memcpy(sjtbl->tabs, sjtabs, tabs_size);
616 sjtbl->is_confluent= FALSE;
617 sjtbl->tabs_end= sjtbl->tabs + (last_tab - sjtabs);
618 sjtbl->rowid_len= jt_rowid_offset;
619 sjtbl->null_bits= jt_null_bits;
620 sjtbl->null_bytes= (jt_null_bits + 7)/8;
621 sjtbl->tmp_table=
622 create_duplicate_weedout_tmp_table(thd,
623 sjtbl->rowid_len +
624 sjtbl->null_bytes,
625 sjtbl);
626 join->sj_tmp_tables.push_back(sjtbl->tmp_table);
627 }
628 else
629 {
630 /*
631 This is confluent case where the entire subquery predicate does
632 not depend on anything at all, ie this is
633 WHERE const IN (uncorrelated select)
634 */
635 if (!(sjtbl= new (thd->mem_root) SJ_TMP_TABLE))
636 DBUG_RETURN(TRUE); /* purecov: inspected */
637 sjtbl->tmp_table= NULL;
638 sjtbl->is_confluent= TRUE;
639 sjtbl->have_confluent_row= FALSE;
640 }
641 join->join_tab[first_table].flush_weedout_table= sjtbl;
642 last_sj_tab->check_weed_out_table= sjtbl;
643
644 tableno+= pos->n_sj_tables;
645 break;
646 }
647 case SJ_OPT_FIRST_MATCH:
648 {
649 /*
650 Setup a "jump" from the last table in the range of inner tables
651 to the last outer table before the inner tables.
652 If there are outer tables inbetween the inner tables, we have to
653 setup a "split jump": Jump from the last inner table to the last
654 outer table within the range, then from the last inner table
655 before the outer table(s), jump to the last outer table before
656 this range of inner tables, etc.
657 */
658 JOIN_TAB *jump_to= tab - 1;
659 DBUG_ASSERT(tab->emb_sj_nest != NULL); // First table must be inner
660 for (JOIN_TAB *tab_in_range= tab;
661 tab_in_range <= last_sj_tab;
662 tab_in_range++)
663 {
664 if (!tab_in_range->emb_sj_nest)
665 {
666 /*
667 Let last non-correlated table be jump target for
668 subsequent inner tables.
669 */
670 jump_to= tab_in_range;
671 }
672 else
673 {
674 /*
675 Assign jump target for last table in a consecutive range of
676 inner tables.
677 */
678 if (tab_in_range == last_sj_tab || !(tab_in_range+1)->emb_sj_nest)
679 {
680 tab_in_range->firstmatch_return= jump_to;
681 tab_in_range->match_tab= last_sj_tab;
682 }
683 }
684 }
685 tableno+= pos->n_sj_tables;
686 break;
687 }
688 }
689 }
690 DBUG_RETURN(FALSE);
691 }
692
693
694 /*
695 Destroy all temporary tables created by NL-semijoin runtime
696 */
697
destroy_sj_tmp_tables(JOIN * join)698 static void destroy_sj_tmp_tables(JOIN *join)
699 {
700 List_iterator<TABLE> it(join->sj_tmp_tables);
701 TABLE *table;
702 while ((table= it++))
703 {
704 /*
705 SJ-Materialization tables are initialized for either sequential reading
706 or index lookup, DuplicateWeedout tables are not initialized for read
707 (we only write to them), so need to call ha_index_or_rnd_end.
708 */
709 table->file->ha_index_or_rnd_end();
710 free_tmp_table(join->thd, table);
711 }
712 join->sj_tmp_tables.empty();
713 }
714
715
716 /**
717 Remove all rows from all temp tables used by NL-semijoin runtime
718
719 @param join The join to remove tables for
720
721 All rows must be removed from all temporary tables before every join
722 re-execution.
723 */
724
clear_sj_tmp_tables(JOIN * join)725 static int clear_sj_tmp_tables(JOIN *join)
726 {
727 int res;
728 List_iterator<TABLE> it(join->sj_tmp_tables);
729 TABLE *table;
730 while ((table= it++))
731 {
732 if ((res= table->file->ha_delete_all_rows()))
733 return res; /* purecov: inspected */
734 }
735 Semijoin_mat_exec *sjm;
736 List_iterator<Semijoin_mat_exec> it2(join->sjm_exec_list);
737 while ((sjm= it2++))
738 {
739 JOIN_TAB *const tab= join->join_tab + sjm->mat_table_index;
740 DBUG_ASSERT(tab->materialize_table);
741 tab->materialized= false;
742 // The materialized table must be re-read on next evaluation:
743 tab->table->status= STATUS_GARBAGE | STATUS_NOT_FOUND;
744 }
745 return 0;
746 }
747
748
749 /**
750 Reset the state of this join object so that it is ready for a
751 new execution.
752 */
753
reset()754 void JOIN::reset()
755 {
756 DBUG_ENTER("JOIN::reset");
757
758 unit->offset_limit_cnt= (ha_rows)(select_lex->offset_limit ?
759 select_lex->offset_limit->val_uint() :
760 ULL(0));
761
762 first_record= false;
763 group_sent= false;
764
765 if (tmp_tables)
766 {
767 for (uint tmp= primary_tables; tmp < primary_tables + tmp_tables; tmp++)
768 {
769 TABLE *tmp_table= join_tab[tmp].table;
770 if (!tmp_table->is_created())
771 continue;
772 tmp_table->file->extra(HA_EXTRA_RESET_STATE);
773 tmp_table->file->ha_delete_all_rows();
774 free_io_cache(tmp_table);
775 filesort_free_buffers(tmp_table,0);
776 }
777 }
778 clear_sj_tmp_tables(this);
779 if (current_ref_ptrs != items0)
780 {
781 set_items_ref_array(items0);
782 set_group_rpa= false;
783 }
784
785 /* need to reset ref access state (see join_read_key) */
786 if (join_tab)
787 for (uint i= 0; i < tables; i++)
788 join_tab[i].ref.key_err= TRUE;
789
790 /* Reset of sum functions */
791 if (sum_funcs)
792 {
793 Item_sum *func, **func_ptr= sum_funcs;
794 while ((func= *(func_ptr++)))
795 func->clear();
796 }
797
798 if (!(select_options & SELECT_DESCRIBE) && select_lex->has_ft_funcs())
799 (void) init_ftfuncs(thd, select_lex, MY_TEST(order));
800
801 DBUG_VOID_RETURN;
802 }
803
804
805 /**
806 Prepare join result.
807
808 @details Prepare join result prior to join execution or describing.
809 Instantiate derived tables and get schema tables result if necessary.
810
811 @return
812 TRUE An error during derived or schema tables instantiation.
813 FALSE Ok
814 */
815
prepare_result(List<Item> ** columns_list)816 bool JOIN::prepare_result(List<Item> **columns_list)
817 {
818 DBUG_ENTER("JOIN::prepare_result");
819
820 error= 0;
821 /* Create result tables for materialized views. */
822 if (!zero_result_cause &&
823 select_lex->handle_derived(thd->lex, &mysql_derived_create))
824 goto err;
825
826 if (result->prepare2())
827 goto err;
828
829 if ((select_lex->options & OPTION_SCHEMA_TABLE) &&
830 get_schema_tables_result(this, PROCESSED_BY_JOIN_EXEC))
831 goto err;
832
833 DBUG_RETURN(FALSE);
834
835 err:
836 error= 1;
837 DBUG_RETURN(TRUE);
838 }
839
840
841 /**
842 Explain join.
843 */
844
845 bool
explain()846 JOIN::explain()
847 {
848 Opt_trace_context * const trace= &thd->opt_trace;
849 Opt_trace_object trace_wrapper(trace);
850 Opt_trace_object trace_exec(trace, "join_explain");
851 trace_exec.add_select_number(select_lex->select_number);
852 Opt_trace_array trace_steps(trace, "steps");
853 List<Item> *columns_list= &fields_list;
854 bool ret;
855 DBUG_ENTER("JOIN::explain");
856
857 THD_STAGE_INFO(thd, stage_explaining);
858
859 if (prepare_result(&columns_list))
860 DBUG_RETURN(true);
861
862 if (!tables_list && (tables || !select_lex->with_sum_func))
863 { // Only test of functions
864 ret= explain_no_table(thd, this, zero_result_cause ? zero_result_cause
865 : "No tables used");
866 /* Single select (without union) always returns 0 or 1 row */
867 thd->limit_found_rows= send_records;
868 thd->set_examined_row_count(0);
869 DBUG_RETURN(ret);
870 }
871 /*
872 Don't reset the found rows count if there're no tables as
873 FOUND_ROWS() may be called. Never reset the examined row count here.
874 It must be accumulated from all join iterations of all join parts.
875 */
876 if (tables)
877 thd->limit_found_rows= 0;
878
879 if (zero_result_cause)
880 {
881 ret= explain_no_table(thd, this, zero_result_cause);
882 DBUG_RETURN(ret);
883 }
884
885 if (tables)
886 ret= explain_query_specification(thd, this);
887 else
888 ret= explain_no_table(thd, this, "No tables used");
889
890 DBUG_RETURN(ret);
891 }
892
893
894 /**
895 Clean up and destroy join object.
896
897 @return false if previous execution was successful, and true otherwise
898 */
899
destroy()900 bool JOIN::destroy()
901 {
902 DBUG_ENTER("JOIN::destroy");
903 select_lex->join= 0;
904
905 cond_equal= 0;
906
907 cleanup(1);
908
909 if (join_tab) // We should not have tables > 0 and join_tab != NULL
910 for (uint i= 0; i < tables; i++)
911 {
912 JOIN_TAB *const tab= join_tab + i;
913
914 DBUG_ASSERT(!tab->table || !tab->table->sort.record_pointers);
915 if (tab->op)
916 {
917 if (tab->op->type() == QEP_operation::OT_TMP_TABLE)
918 {
919 if (tab->table) // Check tmp table is not yet freed.
920 free_tmp_table(thd, tab->table);
921 delete tab->tmp_table_param;
922 tab->tmp_table_param= NULL;
923 }
924 tab->op->free();
925 tab->op= NULL;
926 }
927
928 tab->table= NULL;
929 }
930 /* Cleanup items referencing temporary table columns */
931 cleanup_item_list(tmp_all_fields1);
932 cleanup_item_list(tmp_all_fields3);
933 destroy_sj_tmp_tables(this);
934
935 List_iterator<Semijoin_mat_exec> sjm_list_it(sjm_exec_list);
936 Semijoin_mat_exec *sjm;
937 while ((sjm= sjm_list_it++))
938 delete sjm;
939 sjm_exec_list.empty();
940
941 keyuse.clear();
942 DBUG_RETURN(MY_TEST(error));
943 }
944
945
cleanup_item_list(List<Item> & items) const946 void JOIN::cleanup_item_list(List<Item> &items) const
947 {
948 if (!items.is_empty())
949 {
950 List_iterator_fast<Item> it(items);
951 Item *item;
952 while ((item= it++))
953 item->cleanup();
954 }
955 }
956
957
958 /**
959 Prepare stage of mysql_select.
960
961 @param thd thread handler
962 the top-level select_lex for this query
963 @param tables list of all tables used in this query.
964 The tables have been pre-opened.
965 @param wild_num number of wildcards used in the top level
966 select of this query.
967 For example statement
968 SELECT *, t1.*, catalog.t2.* FROM t0, t1, t2;
969 has 3 wildcards.
970 @param fields list of items in SELECT list of the top-level
971 select
972 e.g. SELECT a, b, c FROM t1 will have Item_field
973 for a, b and c in this list.
974 @param conds top level item of an expression representing
975 WHERE clause of the top level select
976 @param og_num total number of ORDER BY and GROUP BY clauses
977 arguments
978 @param order linked list of ORDER BY agruments
979 @param group linked list of GROUP BY arguments
980 @param having top level item of HAVING expression
981 @param select_options select options (BIG_RESULT, etc)
982 @param result an instance of result set handling class.
983 This object is responsible for send result
984 set rows to the client or inserting them
985 into a table.
986 @param unit top-level UNIT of this query
987 UNIT is an artificial object created by the
988 parser for every SELECT clause.
989 e.g.
990 SELECT * FROM t1 WHERE a1 IN (SELECT * FROM t2)
991 has 2 unions.
992 @param select_lex the only SELECT_LEX of this query
993 @param[out] free_join Will be set to false if select_lex->join does
994 not need to be freed.
995
996 @retval
997 false success
998 @retval
999 true an error
1000
1001 @note tables must be opened before calling mysql_prepare_select.
1002 */
1003
1004 static bool
mysql_prepare_select(THD * thd,TABLE_LIST * tables,uint wild_num,List<Item> & fields,Item * conds,uint og_num,ORDER * order,ORDER * group,Item * having,ulonglong select_options,select_result * result,SELECT_LEX_UNIT * unit,SELECT_LEX * select_lex,bool * free_join)1005 mysql_prepare_select(THD *thd,
1006 TABLE_LIST *tables, uint wild_num, List<Item> &fields,
1007 Item *conds, uint og_num, ORDER *order, ORDER *group,
1008 Item *having, ulonglong select_options,
1009 select_result *result, SELECT_LEX_UNIT *unit,
1010 SELECT_LEX *select_lex, bool *free_join)
1011 {
1012 bool err= false;
1013 JOIN *join;
1014
1015 DBUG_ENTER("mysql_prepare_select");
1016 select_lex->context.resolve_in_select_list= TRUE;
1017 if (select_lex->join != 0)
1018 {
1019 join= select_lex->join;
1020 /*
1021 is it single SELECT in derived table, called in derived table
1022 creation
1023 */
1024 if (select_lex->linkage != DERIVED_TABLE_TYPE ||
1025 (select_options & SELECT_DESCRIBE))
1026 {
1027 if (select_lex->linkage != GLOBAL_OPTIONS_TYPE)
1028 {
1029 //here is EXPLAIN of subselect or derived table
1030 if (join->change_result(result))
1031 {
1032 DBUG_RETURN(TRUE);
1033 }
1034 /*
1035 Original join tabs might be overwritten at first
1036 subselect execution. So we need to restore them.
1037 */
1038 Item_subselect *subselect= select_lex->master_unit()->item;
1039 if (subselect && subselect->is_uncacheable())
1040 join->reset();
1041 }
1042 else
1043 {
1044 err= join->prepare(tables, wild_num,
1045 conds, og_num, order, group, having,
1046 select_lex, unit);
1047 if (err)
1048 DBUG_RETURN(true);
1049 }
1050 }
1051 *free_join= false;
1052 join->select_options= select_options;
1053 }
1054 else
1055 {
1056 if (!(join= new JOIN(thd, fields, select_options, result)))
1057 DBUG_RETURN(TRUE); /* purecov: inspected */
1058 THD_STAGE_INFO(thd, stage_init);
1059 thd->lex->used_tables=0; // Updated by setup_fields
1060 err= join->prepare(tables, wild_num,
1061 conds, og_num, order, group, having,
1062 select_lex, unit);
1063 if (err)
1064 DBUG_RETURN(true);
1065 }
1066
1067 DBUG_RETURN(err);
1068 }
1069
1070
1071 /**
1072 Execute stage of mysql_select.
1073
1074 @param thd thread handler
1075 @param select_lex the only SELECT_LEX of this query
1076 @param free_join if join should be freed
1077
1078 @return Operation status
1079 @retval false success
1080 @retval true an error
1081
1082 @note tables must be opened and locked before calling mysql_execute_select.
1083 */
1084
1085 static bool
mysql_execute_select(THD * thd,SELECT_LEX * select_lex,bool free_join)1086 mysql_execute_select(THD *thd, SELECT_LEX *select_lex, bool free_join)
1087 {
1088 bool err;
1089 JOIN* join= select_lex->join;
1090
1091 DBUG_ENTER("mysql_execute_select");
1092 DBUG_ASSERT(join);
1093
1094 if ((err= join->optimize()))
1095 {
1096 goto err; // 1
1097 }
1098
1099 if (thd->is_error())
1100 goto err;
1101
1102 if (join->select_options & SELECT_DESCRIBE)
1103 {
1104 join->explain();
1105 free_join= false;
1106 }
1107 else
1108 join->exec();
1109
1110 err:
1111 if (free_join)
1112 {
1113 THD_STAGE_INFO(thd, stage_end);
1114 err|= select_lex->cleanup();
1115 DBUG_RETURN(err || thd->is_error());
1116 }
1117 DBUG_RETURN(join->error);
1118 }
1119
1120
1121 /**
1122 An entry point to single-unit select (a select without UNION).
1123
1124 @param thd thread handler
1125 @param tables list of all tables used in this query.
1126 The tables have been pre-opened.
1127 @param wild_num number of wildcards used in the top level
1128 select of this query.
1129 For example statement
1130 SELECT *, t1.*, catalog.t2.* FROM t0, t1, t2;
1131 has 3 wildcards.
1132 @param fields list of items in SELECT list of the top-level
1133 select
1134 e.g. SELECT a, b, c FROM t1 will have Item_field
1135 for a, b and c in this list.
1136 @param conds top level item of an expression representing
1137 WHERE clause of the top level select
1138 @param order linked list of ORDER BY agruments
1139 @param group linked list of GROUP BY arguments
1140 @param having top level item of HAVING expression
1141 @param select_options select options (BIG_RESULT, etc)
1142 @param result an instance of result set handling class.
1143 This object is responsible for send result
1144 set rows to the client or inserting them
1145 into a table.
1146 @param unit top-level UNIT of this query
1147 UNIT is an artificial object created by the
1148 parser for every SELECT clause.
1149 e.g.
1150 SELECT * FROM t1 WHERE a1 IN (SELECT * FROM t2)
1151 has 2 unions.
1152 @param select_lex the only SELECT_LEX of this query
1153
1154 @retval
1155 false success
1156 @retval
1157 true an error
1158 */
1159
1160 bool
mysql_select(THD * thd,TABLE_LIST * tables,uint wild_num,List<Item> & fields,Item * conds,SQL_I_List<ORDER> * order,SQL_I_List<ORDER> * group,Item * having,ulonglong select_options,select_result * result,SELECT_LEX_UNIT * unit,SELECT_LEX * select_lex)1161 mysql_select(THD *thd,
1162 TABLE_LIST *tables, uint wild_num, List<Item> &fields,
1163 Item *conds, SQL_I_List<ORDER> *order, SQL_I_List<ORDER> *group,
1164 Item *having, ulonglong select_options,
1165 select_result *result, SELECT_LEX_UNIT *unit,
1166 SELECT_LEX *select_lex)
1167 {
1168 bool free_join= true;
1169 uint og_num= 0;
1170 ORDER *first_order= NULL;
1171 ORDER *first_group= NULL;
1172 DBUG_ENTER("mysql_select");
1173
1174 if (order)
1175 {
1176 og_num= order->elements;
1177 first_order= order->first;
1178 }
1179 if (group)
1180 {
1181 og_num+= group->elements;
1182 first_group= group->first;
1183 }
1184
1185 if (mysql_prepare_select(thd, tables, wild_num, fields,
1186 conds, og_num, first_order, first_group, having,
1187 select_options, result, unit,
1188 select_lex, &free_join))
1189 {
1190 if (free_join)
1191 {
1192 THD_STAGE_INFO(thd, stage_end);
1193 (void) select_lex->cleanup();
1194 }
1195 DBUG_RETURN(true);
1196 }
1197
1198 if (! thd->lex->is_query_tables_locked())
1199 {
1200 /*
1201 If tables are not locked at this point, it means that we have delayed
1202 this step until after prepare stage (i.e. this moment). This allows to
1203 do better partition pruning and avoid locking unused partitions.
1204 As a consequence, in such a case, prepare stage can rely only on
1205 metadata about tables used and not data from them.
1206 We need to lock tables now in order to proceed with the remaning
1207 stages of query optimization and execution.
1208 */
1209 if (lock_tables(thd, thd->lex->query_tables, thd->lex->table_count, 0))
1210 {
1211 if (free_join)
1212 {
1213 THD_STAGE_INFO(thd, stage_end);
1214 (void) select_lex->cleanup();
1215 }
1216 DBUG_RETURN(true);
1217 }
1218
1219 /*
1220 Only register query in cache if it tables were locked above.
1221
1222 Tables must be locked before storing the query in the query cache.
1223 Transactional engines must been signalled that the statement started,
1224 which external_lock signals.
1225 */
1226 query_cache_store_query(thd, thd->lex->query_tables);
1227 }
1228
1229 DBUG_RETURN(mysql_execute_select(thd, select_lex, free_join));
1230 }
1231
1232 /*****************************************************************************
1233 Go through all combinations of not marked tables and find the one
1234 which uses least records
1235 *****************************************************************************/
1236
1237 /**
1238 Returns which join buffer algorithms are enabled for this session.
1239
1240 @param thd the @c THD for this session
1241
1242 @return bitmap with available join buffer algorithms
1243 */
1244
join_buffer_alg(const THD * thd)1245 static uint join_buffer_alg(const THD *thd)
1246 {
1247 uint alg= JOIN_CACHE::ALG_NONE;
1248
1249 if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_BNL))
1250 alg|= JOIN_CACHE::ALG_BNL;
1251
1252 if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_BKA))
1253 {
1254 bool use_bka_unique= false;
1255 DBUG_EXECUTE_IF("test_bka_unique", use_bka_unique= true;);
1256
1257 if (use_bka_unique)
1258 alg|= JOIN_CACHE::ALG_BKA_UNIQUE;
1259 else
1260 alg|= JOIN_CACHE::ALG_BKA;
1261 }
1262
1263 return alg;
1264 }
1265
1266
1267 /**
1268 Find how much space the prevous read not const tables takes in cache.
1269 */
1270
calc_used_field_length(THD * thd,JOIN_TAB * join_tab)1271 void calc_used_field_length(THD *thd, JOIN_TAB *join_tab)
1272 {
1273 uint null_fields,blobs,fields,rec_length;
1274 Field **f_ptr,*field;
1275 uint uneven_bit_fields;
1276 MY_BITMAP *read_set= join_tab->table->read_set;
1277
1278 uneven_bit_fields= null_fields= blobs= fields= rec_length=0;
1279 for (f_ptr=join_tab->table->field ; (field= *f_ptr) ; f_ptr++)
1280 {
1281 if (bitmap_is_set(read_set, field->field_index))
1282 {
1283 uint flags=field->flags;
1284 fields++;
1285 rec_length+=field->pack_length();
1286 if (flags & BLOB_FLAG)
1287 blobs++;
1288 if (!(flags & NOT_NULL_FLAG))
1289 null_fields++;
1290 if (field->type() == MYSQL_TYPE_BIT &&
1291 ((Field_bit*)field)->bit_len)
1292 uneven_bit_fields++;
1293 }
1294 }
1295 if (null_fields || uneven_bit_fields)
1296 rec_length+=(join_tab->table->s->null_fields+7)/8;
1297 if (join_tab->table->maybe_null)
1298 rec_length+=sizeof(my_bool);
1299 if (blobs)
1300 {
1301 uint blob_length=(uint) (join_tab->table->file->stats.mean_rec_length-
1302 (join_tab->table->s->reclength- rec_length));
1303 rec_length+= max<uint>(4U, blob_length);
1304 }
1305 /**
1306 @todo why don't we count the rowids that we might need to store
1307 when using DuplicateElimination?
1308 */
1309 join_tab->used_fields=fields;
1310 join_tab->used_fieldlength=rec_length;
1311 join_tab->used_blobs=blobs;
1312 join_tab->used_null_fields= null_fields;
1313 join_tab->used_uneven_bit_fields= uneven_bit_fields;
1314 }
1315
1316
1317 /**
1318 Set up JOIN_TAB structs according to the picked join order in best_positions.
1319 This allocates execution structures so may be called only after we have the
1320 very final plan. It must be called after
1321 Optimize_table_order::fix_semijoin_strategies().
1322
1323 @return False if success, True if error
1324
1325 @details
1326 - create join->join_tab array and copy from existing JOIN_TABs in join order
1327 - create helper structs for materialized semi-join handling
1328 - finalize semi-join strategy choices
1329 - Number of intermediate tables "tmp_tables" is calculated.
1330 - "tables" and "primary_tables" are recalculated.
1331
1332 Notice that intermediate tables will not have a POSITION reference; and they
1333 will not have a TABLE reference before the final stages of code generation.
1334 */
1335
get_best_combination()1336 bool JOIN::get_best_combination()
1337 {
1338 DBUG_ENTER("JOIN::get_best_combination");
1339
1340 // At this point "tables" and "primary"tables" represent the same:
1341 DBUG_ASSERT(tables == primary_tables);
1342
1343 /*
1344 Allocate additional space for tmp tables.
1345 Number of plan nodes:
1346 # of regular input tables (including semi-joined ones) +
1347 # of semi-join nests for materialization +
1348 1? + // For GROUP BY
1349 1? + // For DISTINCT
1350 1? + // For aggregation functions aggregated in outer query
1351 // when used with distinct
1352 1? + // For ORDER BY
1353 1? // buffer result
1354 Up to 2 tmp tables are actually used, but it's hard to tell exact number
1355 at this stage.
1356 */
1357 uint tmp_tables= (group_list ? 1 : 0) +
1358 (select_distinct ?
1359 (tmp_table_param.outer_sum_func_count ? 2 : 1) : 0) +
1360 (order ? 1 : 0) +
1361 (select_options & (SELECT_BIG_RESULT | OPTION_BUFFER_RESULT) ? 1 : 0) ;
1362 if (tmp_tables > 2)
1363 tmp_tables= 2;
1364
1365 /*
1366 Rearrange queries with materialized semi-join nests so that the semi-join
1367 nest is replaced with a reference to a materialized temporary table and all
1368 materialized subquery tables are placed after the intermediate tables.
1369 After the following loop, "inner_target" is the position of the first
1370 subquery table (if any). "outer_target" is the position of first outer
1371 table, and will later be used to track the position of any materialized
1372 temporary tables.
1373 */
1374 const bool has_semijoin= !select_lex->sj_nests.is_empty();
1375 uint outer_target= 0;
1376 uint inner_target= primary_tables + tmp_tables;
1377 uint sjm_nests= 0;
1378
1379 if (has_semijoin)
1380 {
1381 for (uint tableno= 0; tableno < primary_tables; )
1382 {
1383 if (sj_is_materialize_strategy(best_positions[tableno].sj_strategy))
1384 {
1385 sjm_nests++;
1386 inner_target-= (best_positions[tableno].n_sj_tables - 1);
1387 tableno+= best_positions[tableno].n_sj_tables;
1388 }
1389 else
1390 tableno++;
1391 }
1392 }
1393 if (!(join_tab= new(thd->mem_root) JOIN_TAB[tables + sjm_nests + tmp_tables]))
1394 DBUG_RETURN(true);
1395
1396 int sjm_index= tables; // Number assigned to materialized temporary table
1397 int remaining_sjm_inner= 0;
1398 for (uint tableno= 0; tableno < tables; tableno++)
1399 {
1400 if (has_semijoin &&
1401 sj_is_materialize_strategy(best_positions[tableno].sj_strategy))
1402 {
1403 DBUG_ASSERT(outer_target < inner_target);
1404
1405 POSITION *const pos_table= best_positions + tableno;
1406 TABLE_LIST *const sj_nest= pos_table->table->emb_sj_nest;
1407
1408 // Handle this many inner tables of materialized semi-join
1409 remaining_sjm_inner= pos_table->n_sj_tables;
1410
1411 Semijoin_mat_exec *const sjm_exec=
1412 new (thd->mem_root)
1413 Semijoin_mat_exec(sj_nest,
1414 (pos_table->sj_strategy == SJ_OPT_MATERIALIZE_SCAN),
1415 remaining_sjm_inner, outer_target, inner_target);
1416 if (!sjm_exec)
1417 DBUG_RETURN(true);
1418
1419 (join_tab + outer_target)->sj_mat_exec= sjm_exec;
1420
1421 if (setup_materialized_table(join_tab + outer_target, sjm_index,
1422 pos_table, best_positions + sjm_index))
1423 DBUG_RETURN(true);
1424
1425 map2table[sjm_exec->table->tablenr]= join_tab + outer_target;
1426
1427 outer_target++;
1428 sjm_index++;
1429 }
1430 /*
1431 Locate join_tab target for the table we are considering.
1432 (remaining_sjm_inner becomes negative for non-SJM tables, this can be
1433 safely ignored).
1434 */
1435 const uint target=
1436 (remaining_sjm_inner--) > 0 ? inner_target++ : outer_target++;
1437 JOIN_TAB *const tab= join_tab + target;
1438
1439 // Copy data from existing join_tab
1440 *tab= *best_positions[tableno].table;
1441
1442 tab->position= best_positions + tableno;
1443
1444 TABLE *const table= tab->table;
1445 table->reginfo.join_tab= tab;
1446 if (!*tab->on_expr_ref)
1447 table->reginfo.not_exists_optimize= false; // Only with LEFT JOIN
1448 map2table[table->tablenr]= tab;
1449 }
1450
1451 // Count the materialized semi-join tables as regular input tables
1452 tables+= sjm_nests + tmp_tables;
1453 // Set the number of non-materialized tables:
1454 primary_tables= outer_target;
1455
1456 if (has_semijoin)
1457 {
1458 set_semijoin_info();
1459
1460 // Update equalities and keyuses after having added SJ materialization
1461 if (update_equalities_for_sjm())
1462 DBUG_RETURN(true);
1463 }
1464 // sjm is no longer needed, trash it. To reuse it, reset its members!
1465 List_iterator<TABLE_LIST> sj_list_it(select_lex->sj_nests);
1466 TABLE_LIST *sj_nest;
1467 while ((sj_nest= sj_list_it++))
1468 TRASH(static_cast<void*>(&sj_nest->nested_join->sjm),
1469 sizeof(sj_nest->nested_join->sjm));
1470
1471 DBUG_RETURN(false);
1472 }
1473
1474
1475 /**
1476 Set access methods for the tables of a query plan.
1477
1478 @return False if success, True if error
1479
1480 @details
1481 We need to fill in data for the case where
1482 - There is no key selected (use JT_ALL)
1483 - Loose scan semi-join strategy is selected (use JT_ALL)
1484 - A ref key can be used (use JT_REF, JT_REF_OR_NULL, JT_EQ_REF or JT_FT)
1485
1486 @note We cannot setup fields used for ref access before we have sorted
1487 the items within multiple equalities according to the final order of
1488 the tables involved in the join operation. Currently, this occurs in
1489 @see substitute_for_best_equal_field().
1490 */
set_access_methods()1491 bool JOIN::set_access_methods()
1492 {
1493 DBUG_ENTER("JOIN::set_access_methods");
1494
1495 full_join= false;
1496
1497 for (uint tableno= const_tables; tableno < tables; tableno++)
1498 {
1499 JOIN_TAB *const tab= join_tab + tableno;
1500
1501 if (!tab->position)
1502 continue;
1503
1504 DBUG_PRINT("info",("type: %d", tab->type));
1505
1506 // Set preliminary join cache setting based on decision from greedy search
1507 tab->use_join_cache= tab->position->use_join_buffer ?
1508 JOIN_CACHE::ALG_BNL : JOIN_CACHE::ALG_NONE;
1509
1510 if (tab->type == JT_CONST || tab->type == JT_SYSTEM)
1511 continue; // Handled in make_join_statistics()
1512
1513 Key_use *const keyuse= tab->position->key;
1514 if (tab->position->sj_strategy == SJ_OPT_LOOSE_SCAN)
1515 {
1516 DBUG_ASSERT(tab->keys.is_set(tab->position->loosescan_key));
1517 tab->type= JT_ALL; // @todo is this consistent for a LooseScan table ?
1518 tab->index= tab->position->loosescan_key;
1519 }
1520 else if (!keyuse)
1521 {
1522 tab->type= JT_ALL;
1523 if (tableno > const_tables)
1524 full_join= true;
1525 }
1526 else
1527 {
1528 if (create_ref_for_key(this, tab, keyuse, tab->prefix_tables()))
1529 DBUG_RETURN(true);
1530 }
1531 }
1532
1533 DBUG_RETURN(false);
1534 }
1535
1536
1537 /**
1538 Set the first_sj_inner_tab and last_sj_inner_tab fields for all tables
1539 inside the semijoin nests of the query.
1540 */
set_semijoin_info()1541 void JOIN::set_semijoin_info()
1542 {
1543 if (select_lex->sj_nests.is_empty())
1544 return;
1545
1546 for (uint tableno= const_tables; tableno < tables; )
1547 {
1548 JOIN_TAB *const tab= join_tab + tableno;
1549 const POSITION *const pos= tab->position;
1550
1551 if (!pos)
1552 {
1553 tableno++;
1554 continue;
1555 }
1556 switch (pos->sj_strategy)
1557 {
1558 case SJ_OPT_NONE:
1559 tableno++;
1560 break;
1561 case SJ_OPT_MATERIALIZE_LOOKUP:
1562 case SJ_OPT_MATERIALIZE_SCAN:
1563 case SJ_OPT_LOOSE_SCAN:
1564 case SJ_OPT_DUPS_WEEDOUT:
1565 case SJ_OPT_FIRST_MATCH:
1566 /*
1567 Remember the first and last semijoin inner tables; this serves to tell
1568 a JOIN_TAB's semijoin strategy (like in setup_join_buffering()).
1569 */
1570 JOIN_TAB *last_sj_tab= tab + pos->n_sj_tables - 1;
1571 JOIN_TAB *last_sj_inner=
1572 (pos->sj_strategy == SJ_OPT_DUPS_WEEDOUT) ?
1573 /* Range may end with non-inner table so cannot set last_sj_inner_tab */
1574 NULL : last_sj_tab;
1575 for (JOIN_TAB *tab_in_range= tab;
1576 tab_in_range <= last_sj_tab;
1577 tab_in_range++)
1578 {
1579 tab_in_range->first_sj_inner_tab= tab;
1580 tab_in_range->last_sj_inner_tab= last_sj_inner;
1581 }
1582 tableno+= pos->n_sj_tables;
1583 break;
1584 }
1585 }
1586 }
1587
1588
1589 /**
1590 Setup a ref access for looking up rows via an index (a key).
1591
1592 @param join The join object being handled
1593 @param j The join_tab which will have the ref access populated
1594 @param first_keyuse First key part of (possibly multi-part) key
1595 @param used_tables Bitmap of available tables
1596
1597 @return False if success, True if error
1598
1599 @details
1600 This function will set up a ref access using the best key found
1601 during access path analysis and cost analysis.
1602
1603 @note We cannot setup fields used for ref access before we have sorted
1604 the items within multiple equalities according to the final order of
1605 the tables involved in the join operation. Currently, this occurs in
1606 @see substitute_for_best_equal_field().
1607 The exception is ref access for const tables, which are fixed
1608 before the greedy search planner is invoked.
1609 */
1610
create_ref_for_key(JOIN * join,JOIN_TAB * j,Key_use * org_keyuse,table_map used_tables)1611 bool create_ref_for_key(JOIN *join, JOIN_TAB *j, Key_use *org_keyuse,
1612 table_map used_tables)
1613 {
1614 DBUG_ENTER("create_ref_for_key");
1615
1616 Key_use *keyuse= org_keyuse;
1617 const uint key= keyuse->key;
1618 const bool ftkey= (keyuse->keypart == FT_KEYPART);
1619 THD *const thd= join->thd;
1620 uint keyparts, length;
1621 TABLE *const table= j->table;
1622 KEY *const keyinfo= table->key_info+key;
1623 Key_use *chosen_keyuses[MAX_REF_PARTS];
1624
1625 DBUG_ASSERT(j->keys.is_set(org_keyuse->key));
1626
1627 if (ftkey)
1628 {
1629 Item_func_match *ifm=(Item_func_match *)keyuse->val;
1630
1631 length=0;
1632 keyparts=1;
1633 ifm->join_key=1;
1634 }
1635 else
1636 {
1637 keyparts=length=0;
1638 uint found_part_ref_or_null= 0;
1639 // Calculate length for the used key. Remember chosen Key_use-s.
1640 do
1641 {
1642 /*
1643 This Key_use is chosen if:
1644 - it involves a key part at the right place (if index is (a,b) we
1645 can have a search criterion on 'b' only if we also have a criterion
1646 on 'a'),
1647 - it references only tables earlier in the plan.
1648 Moreover, the execution layer is limited to maximum one ref_or_null
1649 keypart, as TABLE_REF::null_ref_key is only one byte.
1650 */
1651 if (!(~used_tables & keyuse->used_tables) &&
1652 keyparts == keyuse->keypart &&
1653 !(found_part_ref_or_null & keyuse->optimize))
1654 {
1655 DBUG_ASSERT(keyparts <= MAX_REF_PARTS);
1656 chosen_keyuses[keyparts]= keyuse;
1657 keyparts++;
1658 length+= keyinfo->key_part[keyuse->keypart].store_length;
1659 found_part_ref_or_null|= keyuse->optimize;
1660 }
1661 keyuse++;
1662 } while (keyuse->table == table && keyuse->key == key);
1663 DBUG_ASSERT(length > 0 && keyparts != 0);
1664 } /* not ftkey */
1665
1666 DBUG_ASSERT(keyparts > 0);
1667
1668 /* set up fieldref */
1669 j->ref.key_parts=keyparts;
1670 j->ref.key_length=length;
1671 j->ref.key=(int) key;
1672 if (!(j->ref.key_buff= (uchar*) thd->calloc(ALIGN_SIZE(length)*2)) ||
1673 !(j->ref.key_copy= (store_key**) thd->alloc((sizeof(store_key*) *
1674 (keyparts)))) ||
1675 !(j->ref.items= (Item**) thd->alloc(sizeof(Item*)*keyparts)) ||
1676 !(j->ref.cond_guards= (bool**) thd->alloc(sizeof(uint*)*keyparts)))
1677 {
1678 DBUG_RETURN(TRUE);
1679 }
1680 j->ref.key_buff2=j->ref.key_buff+ALIGN_SIZE(length);
1681 j->ref.key_err=1;
1682 j->ref.has_record= FALSE;
1683 j->ref.null_rejecting= 0;
1684 j->ref.use_count= 0;
1685 j->ref.disable_cache= FALSE;
1686 keyuse=org_keyuse;
1687
1688 uchar *key_buff= j->ref.key_buff;
1689 uchar *null_ref_key= NULL;
1690 bool keyuse_uses_no_tables= true;
1691 if (ftkey)
1692 {
1693 j->ref.items[0]=((Item_func*)(keyuse->val))->key_item();
1694 /* Predicates pushed down into subquery can't be used FT access */
1695 j->ref.cond_guards[0]= NULL;
1696 if (keyuse->used_tables)
1697 DBUG_RETURN(TRUE); // not supported yet. SerG
1698
1699 j->type=JT_FT;
1700 memset(j->ref.key_copy, 0, sizeof(j->ref.key_copy[0]) * keyparts);
1701 }
1702 else
1703 {
1704 // Set up TABLE_REF based on chosen Key_use-s.
1705 for (uint part_no= 0 ; part_no < keyparts ; part_no++)
1706 {
1707 keyuse= chosen_keyuses[part_no];
1708 uint maybe_null= MY_TEST(keyinfo->key_part[part_no].null_bit);
1709
1710 if (keyuse->val->type() == Item::FIELD_ITEM)
1711 {
1712 // Look up the most appropriate field to base the ref access on.
1713 keyuse->val= get_best_field(static_cast<Item_field *>(keyuse->val),
1714 join->cond_equal);
1715 keyuse->used_tables= keyuse->val->used_tables();
1716 }
1717 j->ref.items[part_no]=keyuse->val; // Save for cond removal
1718 j->ref.cond_guards[part_no]= keyuse->cond_guard;
1719 if (keyuse->null_rejecting)
1720 j->ref.null_rejecting|= (key_part_map)1 << part_no;
1721 keyuse_uses_no_tables= keyuse_uses_no_tables && !keyuse->used_tables;
1722
1723 store_key* key= get_store_key(thd,
1724 keyuse,join->const_table_map,
1725 &keyinfo->key_part[part_no],
1726 key_buff, maybe_null);
1727 if (unlikely(!key || thd->is_fatal_error))
1728 DBUG_RETURN(TRUE);
1729
1730 if (keyuse->used_tables || thd->lex->describe)
1731 /*
1732 Comparing against a non-constant or executing an EXPLAIN
1733 query (which refers to this info when printing the 'ref'
1734 column of the query plan)
1735 */
1736 j->ref.key_copy[part_no]= key;
1737 else
1738 {
1739 /*
1740 key is const, copy value now and possibly skip it while ::exec().
1741
1742 Note:
1743 Result check of store_key::copy() is unnecessary,
1744 it could be an error returned by store_key::copy() method
1745 but stored value is not null and default value could be used
1746 in this case. Methods which used for storing the value
1747 should be responsible for proper null value setting
1748 in case of an error. Thus it's enough to check key->null_key
1749 value only.
1750 */
1751 (void) key->copy();
1752 /*
1753 It should be reevaluated in ::exec() if
1754 constant evaluated to NULL value which we might need to
1755 handle as a special case during JOIN::exec()
1756 (As in : 'Full scan on NULL key')
1757 */
1758 if (key->null_key)
1759 j->ref.key_copy[part_no]= key; // Reevaluate in JOIN::exec()
1760 else
1761 j->ref.key_copy[part_no]= NULL;
1762 }
1763 /*
1764 Remember if we are going to use REF_OR_NULL
1765 But only if field _really_ can be null i.e. we force JT_REF
1766 instead of JT_REF_OR_NULL in case if field can't be null
1767 */
1768 if ((keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL) && maybe_null)
1769 {
1770 DBUG_ASSERT(null_ref_key == NULL); // or we would overwrite it below
1771 null_ref_key= key_buff;
1772 }
1773 key_buff+=keyinfo->key_part[part_no].store_length;
1774 }
1775 } /* not ftkey */
1776 if (j->type == JT_FT)
1777 DBUG_RETURN(false);
1778 if (j->type == JT_CONST)
1779 j->table->const_table= 1;
1780 else if (((actual_key_flags(keyinfo) &
1781 (HA_NOSAME | HA_NULL_PART_KEY)) != HA_NOSAME) ||
1782 keyparts != actual_key_parts(keyinfo) || null_ref_key)
1783 {
1784 /* Must read with repeat */
1785 j->type= null_ref_key ? JT_REF_OR_NULL : JT_REF;
1786 j->ref.null_ref_key= null_ref_key;
1787 }
1788 else if (keyuse_uses_no_tables &&
1789 !(table->file->ha_table_flags() & HA_BLOCK_CONST_TABLE))
1790 {
1791 /*
1792 This happen if we are using a constant expression in the ON part
1793 of an LEFT JOIN.
1794 SELECT * FROM a LEFT JOIN b ON b.key=30
1795 Here we should not mark the table as a 'const' as a field may
1796 have a 'normal' value or a NULL value.
1797 */
1798 j->type=JT_CONST;
1799 }
1800 else
1801 j->type=JT_EQ_REF;
1802 DBUG_RETURN(false);
1803 }
1804
1805
1806
1807 static store_key *
get_store_key(THD * thd,Key_use * keyuse,table_map used_tables,KEY_PART_INFO * key_part,uchar * key_buff,uint maybe_null)1808 get_store_key(THD *thd, Key_use *keyuse, table_map used_tables,
1809 KEY_PART_INFO *key_part, uchar *key_buff, uint maybe_null)
1810 {
1811 if (!((~used_tables) & keyuse->used_tables)) // if const item
1812 {
1813 return new store_key_const_item(thd,
1814 key_part->field,
1815 key_buff + maybe_null,
1816 maybe_null ? key_buff : 0,
1817 key_part->length,
1818 keyuse->val);
1819 }
1820
1821 Item_field *field_item= NULL;
1822 if (keyuse->val->type() == Item::FIELD_ITEM)
1823 field_item= static_cast<Item_field*>(keyuse->val->real_item());
1824 else if (keyuse->val->type() == Item::REF_ITEM)
1825 {
1826 Item_ref *item_ref= static_cast<Item_ref*>(keyuse->val);
1827 if (item_ref->ref_type() == Item_ref::OUTER_REF)
1828 {
1829 if ((*item_ref->ref)->type() == Item::FIELD_ITEM)
1830 field_item= static_cast<Item_field*>(item_ref->real_item());
1831 else if ((*(Item_ref**)(item_ref)->ref)->ref_type()
1832 == Item_ref::DIRECT_REF
1833 &&
1834 item_ref->real_item()->type() == Item::FIELD_ITEM)
1835 field_item= static_cast<Item_field*>(item_ref->real_item());
1836 }
1837 }
1838 if (field_item)
1839 return new store_key_field(thd,
1840 key_part->field,
1841 key_buff + maybe_null,
1842 maybe_null ? key_buff : 0,
1843 key_part->length,
1844 field_item->field,
1845 keyuse->val->full_name());
1846
1847 return new store_key_item(thd,
1848 key_part->field,
1849 key_buff + maybe_null,
1850 maybe_null ? key_buff : 0,
1851 key_part->length,
1852 keyuse->val);
1853 }
1854
1855
1856 /**
1857 Extend e1 by AND'ing e2 to the condition e1 points to. The resulting
1858 condition is fixed. Requirement: the input Items must already have
1859 been fixed.
1860
1861 @param[in,out] e1 Pointer to condition that will be extended with e2
1862 @param e2 Condition that will extend e1
1863
1864 @retval true if there was a memory allocation error, in which case
1865 e1 remains unchanged
1866 @retval false otherwise
1867 */
1868
and_conditions(Item ** e1,Item * e2)1869 bool and_conditions(Item **e1, Item *e2)
1870 {
1871 DBUG_ASSERT(!(*e1) || (*e1)->fixed);
1872 DBUG_ASSERT(!e2 || e2->fixed);
1873 if (*e1)
1874 {
1875 if (!e2)
1876 return false;
1877 Item *res= new Item_cond_and(*e1, e2);
1878 if (unlikely(!res))
1879 return true;
1880
1881 *e1= res;
1882 res->quick_fix_field();
1883 res->update_used_tables();
1884
1885 }
1886 else
1887 *e1= e2;
1888 return false;
1889 }
1890
1891
1892 #define ICP_COND_USES_INDEX_ONLY 10
1893
1894 /*
1895 Get a part of the condition that can be checked using only index fields
1896
1897 SYNOPSIS
1898 make_cond_for_index()
1899 cond The source condition
1900 table The table that is partially available
1901 keyno The index in the above table. Only fields covered by the index
1902 are available
1903 other_tbls_ok TRUE <=> Fields of other non-const tables are allowed
1904
1905 DESCRIPTION
1906 Get a part of the condition that can be checked when for the given table
1907 we have values only of fields covered by some index. The condition may
1908 refer to other tables, it is assumed that we have values of all of their
1909 fields.
1910
1911 Example:
1912 make_cond_for_index(
1913 "cond(t1.field) AND cond(t2.key1) AND cond(t2.non_key) AND cond(t2.key2)",
1914 t2, keyno(t2.key1))
1915 will return
1916 "cond(t1.field) AND cond(t2.key2)"
1917
1918 RETURN
1919 Index condition, or NULL if no condition could be inferred.
1920 */
1921
make_cond_for_index(Item * cond,TABLE * table,uint keyno,bool other_tbls_ok)1922 static Item *make_cond_for_index(Item *cond, TABLE *table, uint keyno,
1923 bool other_tbls_ok)
1924 {
1925 DBUG_ASSERT(cond != NULL);
1926
1927 if (cond->type() == Item::COND_ITEM)
1928 {
1929 uint n_marked= 0;
1930 if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
1931 {
1932 table_map used_tables= 0;
1933 Item_cond_and *new_cond=new Item_cond_and;
1934 if (!new_cond)
1935 return NULL;
1936 List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
1937 Item *item;
1938 while ((item=li++))
1939 {
1940 Item *fix= make_cond_for_index(item, table, keyno, other_tbls_ok);
1941 if (fix)
1942 {
1943 new_cond->argument_list()->push_back(fix);
1944 used_tables|= fix->used_tables();
1945 }
1946 n_marked += MY_TEST(item->marker == ICP_COND_USES_INDEX_ONLY);
1947 }
1948 if (n_marked ==((Item_cond*)cond)->argument_list()->elements)
1949 cond->marker= ICP_COND_USES_INDEX_ONLY;
1950 switch (new_cond->argument_list()->elements) {
1951 case 0:
1952 return NULL;
1953 case 1:
1954 new_cond->set_used_tables(used_tables);
1955 return new_cond->argument_list()->head();
1956 default:
1957 new_cond->quick_fix_field();
1958 new_cond->set_used_tables(used_tables);
1959 return new_cond;
1960 }
1961 }
1962 else /* It's OR */
1963 {
1964 Item_cond_or *new_cond=new Item_cond_or;
1965 if (!new_cond)
1966 return NULL;
1967 List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
1968 Item *item;
1969 while ((item=li++))
1970 {
1971 Item *fix= make_cond_for_index(item, table, keyno, other_tbls_ok);
1972 if (!fix)
1973 return NULL;
1974 new_cond->argument_list()->push_back(fix);
1975 n_marked += MY_TEST(item->marker == ICP_COND_USES_INDEX_ONLY);
1976 }
1977 if (n_marked ==((Item_cond*)cond)->argument_list()->elements)
1978 cond->marker= ICP_COND_USES_INDEX_ONLY;
1979 new_cond->quick_fix_field();
1980 new_cond->set_used_tables(cond->used_tables());
1981 new_cond->top_level_item();
1982 return new_cond;
1983 }
1984 }
1985
1986 if (!uses_index_fields_only(cond, table, keyno, other_tbls_ok))
1987 {
1988 /*
1989 Reset marker since it might have the value
1990 ICP_COND_USES_INDEX_ONLY if this condition is part of the select
1991 condition for multiple tables.
1992 */
1993 cond->marker= 0;
1994 return NULL;
1995 }
1996 cond->marker= ICP_COND_USES_INDEX_ONLY;
1997 return cond;
1998 }
1999
2000
make_cond_remainder(Item * cond,bool exclude_index)2001 static Item *make_cond_remainder(Item *cond, bool exclude_index)
2002 {
2003 if (exclude_index && cond->marker == ICP_COND_USES_INDEX_ONLY)
2004 return 0; /* Already checked */
2005
2006 if (cond->type() == Item::COND_ITEM)
2007 {
2008 table_map tbl_map= 0;
2009 if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
2010 {
2011 /* Create new top level AND item */
2012 Item_cond_and *new_cond=new Item_cond_and;
2013 if (!new_cond)
2014 return (Item*) 0;
2015 List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
2016 Item *item;
2017 while ((item=li++))
2018 {
2019 Item *fix= make_cond_remainder(item, exclude_index);
2020 if (fix)
2021 {
2022 new_cond->argument_list()->push_back(fix);
2023 tbl_map |= fix->used_tables();
2024 }
2025 }
2026 switch (new_cond->argument_list()->elements) {
2027 case 0:
2028 return (Item*) 0;
2029 case 1:
2030 return new_cond->argument_list()->head();
2031 default:
2032 new_cond->quick_fix_field();
2033 new_cond->set_used_tables(tbl_map);
2034 return new_cond;
2035 }
2036 }
2037 else /* It's OR */
2038 {
2039 Item_cond_or *new_cond=new Item_cond_or;
2040 if (!new_cond)
2041 return (Item*) 0;
2042 List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
2043 Item *item;
2044 while ((item=li++))
2045 {
2046 Item *fix= make_cond_remainder(item, FALSE);
2047 if (!fix)
2048 return (Item*) 0;
2049 new_cond->argument_list()->push_back(fix);
2050 tbl_map |= fix->used_tables();
2051 }
2052 new_cond->quick_fix_field();
2053 new_cond->set_used_tables(tbl_map);
2054 new_cond->top_level_item();
2055 return new_cond;
2056 }
2057 }
2058 return cond;
2059 }
2060
2061
2062 /**
2063 Try to extract and push the index condition down to table handler
2064
2065 @param tab A join tab that has tab->table->file and its
2066 condition in tab->m_condition
2067 @param keyno Index for which extract and push the condition
2068 @param other_tbls_ok TRUE <=> Fields of other non-const tables are allowed
2069 @param trace_obj trace object where information is to be added
2070 */
2071
push_index_cond(JOIN_TAB * tab,uint keyno,bool other_tbls_ok,Opt_trace_object * trace_obj)2072 static void push_index_cond(JOIN_TAB *tab, uint keyno, bool other_tbls_ok,
2073 Opt_trace_object *trace_obj)
2074 {
2075 DBUG_ENTER("push_index_cond");
2076
2077 /*
2078 We will only attempt to push down an index condition when the
2079 following criteria are true:
2080 0. The table has a select condition
2081 1. The storage engine supports ICP.
2082 2. The system variable for enabling ICP is ON.
2083 3. The query is not a multi-table update or delete statement. The reason
2084 for this requirement is that the same handler will be used
2085 both for doing the select/join and the update. The pushed index
2086 condition might then also be applied by the storage engine
2087 when doing the update part and result in either not finding
2088 the record to update or updating the wrong record.
2089 4. The JOIN_TAB is not part of a subquery that has guarded conditions
2090 that can be turned on or off during execution of a 'Full scan on NULL
2091 key'.
2092 @see Item_in_optimizer::val_int()
2093 @see subselect_single_select_engine::exec()
2094 @see TABLE_REF::cond_guards
2095 @see setup_join_buffering
2096 5. The join type is not CONST or SYSTEM. The reason for excluding
2097 these join types, is that these are optimized to only read the
2098 record once from the storage engine and later re-use it. In a
2099 join where a pushed index condition evaluates fields from
2100 tables earlier in the join sequence, the pushed condition would
2101 only be evaluated the first time the record value was needed.
2102 6. The index is not a clustered index. The performance improvement
2103 of pushing an index condition on a clustered key is much lower
2104 than on a non-clustered key. This restriction should be
2105 re-evaluated when WL#6061 is implemented.
2106 */
2107 if (tab->condition() &&
2108 tab->table->file->index_flags(keyno, 0, 1) &
2109 HA_DO_INDEX_COND_PUSHDOWN &&
2110 tab->join->thd->optimizer_switch_flag(OPTIMIZER_SWITCH_INDEX_CONDITION_PUSHDOWN) &&
2111 tab->join->thd->lex->sql_command != SQLCOM_UPDATE_MULTI &&
2112 tab->join->thd->lex->sql_command != SQLCOM_DELETE_MULTI &&
2113 !tab->has_guarded_conds() &&
2114 tab->type != JT_CONST && tab->type != JT_SYSTEM &&
2115 !(keyno == tab->table->s->primary_key &&
2116 tab->table->file->primary_key_is_clustered()))
2117 {
2118 DBUG_EXECUTE("where", print_where(tab->condition(), "full cond",
2119 QT_ORDINARY););
2120 Item *idx_cond= make_cond_for_index(tab->condition(), tab->table,
2121 keyno, other_tbls_ok);
2122 DBUG_EXECUTE("where", print_where(idx_cond, "idx cond", QT_ORDINARY););
2123 if (idx_cond)
2124 {
2125 /*
2126 Check that the condition to push actually contains fields from
2127 the index. Without any fields from the index it is unlikely
2128 that it will filter out any records since the conditions on
2129 fields from other tables in most cases have already been
2130 evaluated.
2131 */
2132 idx_cond->update_used_tables();
2133 if ((idx_cond->used_tables() & tab->table->map) == 0)
2134 {
2135 DBUG_VOID_RETURN;
2136 }
2137
2138 Item *idx_remainder_cond= 0;
2139 tab->pre_idx_push_cond= tab->condition();
2140
2141 /*
2142 For BKA cache we store condition to special BKA cache field
2143 because evaluation of the condition requires additional operations
2144 before the evaluation. This condition is used in
2145 JOIN_CACHE_BKA[_UNIQUE]::skip_index_tuple() functions.
2146 */
2147 if (tab->use_join_cache &&
2148 /*
2149 if cache is used then the value is TRUE only
2150 for BKA[_UNIQUE] cache (see setup_join_buffering() func).
2151 In this case other_tbls_ok is an equivalent of
2152 cache->is_key_access().
2153 */
2154 other_tbls_ok &&
2155 (idx_cond->used_tables() &
2156 ~(tab->table->map | tab->join->const_table_map)))
2157 {
2158 tab->cache_idx_cond= idx_cond;
2159 trace_obj->add("pushed_to_BKA", true);
2160 }
2161 else
2162 {
2163 idx_remainder_cond= tab->table->file->idx_cond_push(keyno, idx_cond);
2164 tab->select->icp_cond= idx_cond;
2165 DBUG_EXECUTE("where",
2166 print_where(tab->select->icp_cond, "icp cond",
2167 QT_ORDINARY););
2168 }
2169 /*
2170 Disable eq_ref's "lookup cache" if we've pushed down an index
2171 condition.
2172 TODO: This check happens to work on current ICP implementations, but
2173 there may exist a compliant implementation that will not work
2174 correctly with it. Sort this out when we stabilize the condition
2175 pushdown APIs.
2176 */
2177 if (idx_remainder_cond != idx_cond)
2178 {
2179 tab->ref.disable_cache= TRUE;
2180 trace_obj->add("pushed_index_condition", idx_cond);
2181 }
2182
2183 Item *row_cond= make_cond_remainder(tab->condition(), TRUE);
2184 DBUG_EXECUTE("where", print_where(row_cond, "remainder cond",
2185 QT_ORDINARY););
2186
2187 if (row_cond)
2188 {
2189 if (!idx_remainder_cond)
2190 tab->set_condition(row_cond, __LINE__);
2191 else
2192 {
2193 and_conditions(&row_cond, idx_remainder_cond);
2194 tab->set_condition(row_cond, __LINE__);
2195 }
2196 }
2197 else
2198 tab->set_condition(idx_remainder_cond, __LINE__);
2199 trace_obj->add("table_condition_attached", tab->condition());
2200 if (tab->select)
2201 {
2202 DBUG_EXECUTE("where", print_where(tab->select->cond, "cond",
2203 QT_ORDINARY););
2204 tab->select->cond= tab->condition();
2205 }
2206 }
2207 }
2208 DBUG_VOID_RETURN;
2209 }
2210
2211
2212 /*
2213 Deny usage of join buffer for the specified table
2214
2215 SYNOPSIS
2216 set_join_cache_denial()
2217 tab join table for which join buffer usage is to be denied
2218
2219 DESCRIPTION
2220 The function denies usage of join buffer when joining the table 'tab'.
2221 The table is marked as not employing any join buffer. If a join cache
2222 object has been already allocated for the table this object is destroyed.
2223
2224 RETURN
2225 none
2226 */
2227
2228 static
set_join_cache_denial(JOIN_TAB * join_tab)2229 void set_join_cache_denial(JOIN_TAB *join_tab)
2230 {
2231 if (join_tab->op)
2232 {
2233 join_tab->op->free();
2234 join_tab->op= 0;
2235 }
2236 if (join_tab->use_join_cache)
2237 {
2238 join_tab->use_join_cache= JOIN_CACHE::ALG_NONE;
2239 /*
2240 It could be only sub_select(). It could not be sub_seject_sjm because we
2241 don't do join buffering for the first table in sjm nest.
2242 */
2243 join_tab[-1].next_select= sub_select;
2244 }
2245 }
2246
2247
2248 /*
2249 Revise usage of join buffer for the specified table and the whole nest
2250
2251 SYNOPSIS
2252 revise_cache_usage()
2253 tab join table for which join buffer usage is to be revised
2254
2255 DESCRIPTION
2256 The function revise the decision to use a join buffer for the table 'tab'.
2257 If this table happened to be among the inner tables of a nested outer join/
2258 semi-join the functions denies usage of join buffers for all of them
2259
2260 RETURN
2261 none
2262 */
2263
2264 static
revise_cache_usage(JOIN_TAB * join_tab)2265 void revise_cache_usage(JOIN_TAB *join_tab)
2266 {
2267 JOIN_TAB *tab;
2268 JOIN_TAB *first_inner;
2269
2270 set_join_cache_denial(join_tab);
2271
2272 if (join_tab->first_inner)
2273 {
2274 JOIN_TAB *end_tab= join_tab;
2275 for (first_inner= join_tab->first_inner;
2276 first_inner;
2277 first_inner= first_inner->first_upper)
2278 {
2279 for (tab= end_tab-1; tab >= first_inner; tab--)
2280 set_join_cache_denial(tab);
2281 end_tab= first_inner;
2282 }
2283 }
2284 else if (join_tab->get_sj_strategy() == SJ_OPT_FIRST_MATCH)
2285 {
2286 first_inner= join_tab->first_sj_inner_tab;
2287 for (tab= join_tab-1; tab >= first_inner; tab--)
2288 {
2289 if (tab->first_sj_inner_tab == first_inner)
2290 set_join_cache_denial(tab);
2291 }
2292 }
2293 }
2294
2295
2296 /**
2297 Set up join buffering for a specified table, if possible.
2298
2299 @param tab joined table to check join buffer usage for
2300 @param join join for which the check is performed
2301 @param options options of the join
2302 @param no_jbuf_after don't use join buffering after table with this number
2303 @param icp_other_tables_ok[out] TRUE if condition pushdown supports
2304 other tables presence
2305
2306 @return false if successful, true if error.
2307 Currently, allocation errors for join cache objects are ignored,
2308 and regular execution is chosen silently.
2309
2310 @details
2311 The function finds out whether the table 'tab' can be joined using a join
2312 buffer. This check is performed after the best execution plan for 'join'
2313 has been chosen. If the function decides that a join buffer can be employed
2314 then it selects the most appropriate join cache object that contains this
2315 join buffer.
2316 If it has already been decided to not use join buffering for this table,
2317 no action is taken.
2318
2319 Often it is already decided that join buffering will be used earlier in
2320 the optimization process, and this will also ensure that the most correct
2321 cost for the operation is calculated, and hence the probability of
2322 choosing an optimal join plan is higher. However, some join buffering
2323 decisions cannot currently be taken before this stage, hence we need this
2324 function to decide the most accurate join buffering strategy.
2325
2326 @todo Long-term it is the goal that join buffering strategy is decided
2327 when the plan is selected.
2328
2329 The result of the check and the type of the the join buffer to be used
2330 depend on:
2331 - the access method to access rows of the joined table
2332 - whether the join table is an inner table of an outer join or semi-join
2333 - the optimizer_switch settings for join buffering
2334 - the join 'options'.
2335 In any case join buffer is not used if the number of the joined table is
2336 greater than 'no_jbuf_after'.
2337
2338 If block_nested_loop is turned on, and if all other criteria for using
2339 join buffering is fulfilled (see below), then join buffer is used
2340 for any join operation (inner join, outer join, semi-join) with 'JT_ALL'
2341 access method. In that case, a JOIN_CACHE_BNL object is always employed.
2342
2343 If an index is used to access rows of the joined table and batched_key_access
2344 is on, then a JOIN_CACHE_BKA object is employed. (Unless debug flag,
2345 test_bka unique, is set, then a JOIN_CACHE_BKA_UNIQUE object is employed
2346 instead.)
2347
2348 If the function decides that a join buffer can be used to join the table
2349 'tab' then it sets @c tab->use_join_cache to reflect the chosen algorithm
2350 and assigns the selected join cache object to the field 'cache' of the
2351 previous join table. After creating a join cache object, it will be
2352 initialized. Failure to do so, will cause the decision to use join
2353 buffering to be reverted.
2354
2355 @note
2356 For a nested outer join/semi-join, currently, we either use join buffers for
2357 all inner tables or for none of them.
2358
2359 @todo
2360 Support BKA inside SJ-Materialization nests. When doing this, we'll need
2361 to only store sj-inner tables in the join buffer.
2362 #if 0
2363 JOIN_TAB *first_tab= join->join_tab+join->const_tables;
2364 uint n_tables= i-join->const_tables;
2365 / *
2366 We normally put all preceding tables into the join buffer, except
2367 for the constant tables.
2368 If we're inside a semi-join materialization nest, e.g.
2369
2370 outer_tbl1 outer_tbl2 ( inner_tbl1, inner_tbl2 ) ...
2371 ^-- we're here
2372
2373 then we need to put into the join buffer only the tables from
2374 within the nest.
2375 * /
2376 if (i >= first_sjm_table && i < last_sjm_table)
2377 {
2378 n_tables= i - first_sjm_table; // will be >0 if we got here
2379 first_tab= join->join_tab + first_sjm_table;
2380 }
2381 #endif
2382
2383 */
2384
setup_join_buffering(JOIN_TAB * tab,JOIN * join,ulonglong options,uint no_jbuf_after,bool * icp_other_tables_ok)2385 static bool setup_join_buffering(JOIN_TAB *tab, JOIN *join,
2386 ulonglong options, uint no_jbuf_after,
2387 bool *icp_other_tables_ok)
2388 {
2389 uint flags;
2390 Cost_estimate cost;
2391 ha_rows rows;
2392 uint bufsz= 4096;
2393 JOIN_CACHE *prev_cache;
2394 const bool bnl_on= join->thd->optimizer_switch_flag(OPTIMIZER_SWITCH_BNL);
2395 const bool bka_on= join->thd->optimizer_switch_flag(OPTIMIZER_SWITCH_BKA);
2396 const uint tableno= tab - join->join_tab;
2397 const uint tab_sj_strategy= tab->get_sj_strategy();
2398 bool use_bka_unique= false;
2399 DBUG_EXECUTE_IF("test_bka_unique", use_bka_unique= true;);
2400 *icp_other_tables_ok= TRUE;
2401
2402 if (!(bnl_on || bka_on) || tableno == join->const_tables)
2403 {
2404 DBUG_ASSERT(tab->use_join_cache == JOIN_CACHE::ALG_NONE);
2405 return false;
2406 }
2407 if (options & SELECT_NO_JOIN_CACHE)
2408 goto no_join_cache;
2409 /*
2410 psergey-todo: why the below when execution code seems to handle the
2411 "range checked for each record" case?
2412 */
2413 if (tab->use_quick == QS_DYNAMIC_RANGE)
2414 goto no_join_cache;
2415
2416 /* No join buffering if prevented by no_jbuf_after */
2417 if (tableno > no_jbuf_after)
2418 goto no_join_cache;
2419
2420 /*
2421 An inner table of an outer join nest must not use join buffering if
2422 the first inner table of that outer join nest does not use join buffering.
2423 This condition is not handled by earlier optimizer stages.
2424 */
2425 if (tab->first_inner != NULL &&
2426 tab->first_inner != tab &&
2427 !tab->first_inner->use_join_cache)
2428 goto no_join_cache;
2429 /*
2430 The first inner table of an outer join nest must not use join buffering
2431 if the tables in the embedding outer join nest do not use join buffering.
2432 This condition is not handled by earlier optimizer stages.
2433 */
2434 if (tab->first_upper != NULL &&
2435 !tab->first_upper->use_join_cache)
2436 goto no_join_cache;
2437
2438 switch (tab_sj_strategy)
2439 {
2440 case SJ_OPT_FIRST_MATCH:
2441 /*
2442 Use join cache with FirstMatch semi-join strategy only when semi-join
2443 contains only one table.
2444 */
2445 if (!tab->is_single_inner_of_semi_join())
2446 {
2447 DBUG_ASSERT(tab->use_join_cache == JOIN_CACHE::ALG_NONE);
2448 goto no_join_cache;
2449 }
2450 break;
2451
2452 case SJ_OPT_LOOSE_SCAN:
2453 /* No join buffering if this semijoin nest is handled by loosescan */
2454 DBUG_ASSERT(tab->use_join_cache == JOIN_CACHE::ALG_NONE);
2455 goto no_join_cache;
2456
2457 case SJ_OPT_MATERIALIZE_LOOKUP:
2458 case SJ_OPT_MATERIALIZE_SCAN:
2459 /*
2460 The Materialize strategies reuse the join_tab belonging to the
2461 first table that was materialized. Neither table can use join buffering:
2462 - The first table in a join never uses join buffering.
2463 - The join_tab used for looking up a row in the materialized table, or
2464 scanning the rows of a materialized table, cannot use join buffering.
2465 We allow join buffering for the remaining tables of the materialized
2466 semi-join nest.
2467 */
2468 if (tab->first_sj_inner_tab == tab)
2469 {
2470 DBUG_ASSERT(tab->use_join_cache == JOIN_CACHE::ALG_NONE);
2471 goto no_join_cache;
2472 }
2473 break;
2474
2475 case SJ_OPT_DUPS_WEEDOUT:
2476 // This strategy allows the same join buffering as a regular join would.
2477 case SJ_OPT_NONE:
2478 break;
2479 }
2480
2481 /*
2482 Link with the previous join cache, but make sure that we do not link
2483 join caches of two different operations when the previous operation was
2484 MaterializeLookup or MaterializeScan, ie if:
2485 1. the previous join_tab has join buffering enabled, and
2486 2. the previous join_tab belongs to a materialized semi-join nest, and
2487 3. this join_tab represents a regular table, or is part of a different
2488 semi-join interval than the previous join_tab.
2489 */
2490 prev_cache= (JOIN_CACHE*)(tab-1)->op;
2491 if (prev_cache != NULL && // 1
2492 sj_is_materialize_strategy((tab-1)->get_sj_strategy()) && // 2
2493 tab->first_sj_inner_tab != (tab-1)->first_sj_inner_tab) // 3
2494 prev_cache= NULL;
2495
2496 /*
2497 The following code prevents use of join buffering when there is an
2498 outer join operation and first match semi-join strategy is used, because:
2499
2500 Outer join needs a "match flag" to track that a row should be
2501 NULL-complemented, such flag being attached to first inner table's cache
2502 (tracks whether the cached row from outer table got a match, in which case
2503 no NULL-complemented row is needed).
2504
2505 FirstMatch also needs a "match flag", such flag is attached to sj inner
2506 table's cache (tracks whether the cached row from outer table already got
2507 a first match in the sj-inner table, in which case we don't need to join
2508 this cached row again)
2509 - but a row in a cache has only one "match flag"
2510 - so if "sj inner table"=="first inner", there is a problem.
2511 */
2512 if (tab_sj_strategy == SJ_OPT_FIRST_MATCH &&
2513 tab->is_inner_table_of_outer_join())
2514 goto no_join_cache;
2515
2516 switch (tab->type) {
2517 case JT_ALL:
2518 if (!bnl_on)
2519 {
2520 DBUG_ASSERT(tab->use_join_cache == JOIN_CACHE::ALG_NONE);
2521 goto no_join_cache;
2522 }
2523
2524 if ((options & SELECT_DESCRIBE) ||
2525 ((tab->op= new JOIN_CACHE_BNL(join, tab, prev_cache)) &&
2526 !tab->op->init()))
2527 {
2528 *icp_other_tables_ok= FALSE;
2529 DBUG_ASSERT(might_do_join_buffering(join_buffer_alg(join->thd), tab));
2530 tab->use_join_cache= JOIN_CACHE::ALG_BNL;
2531 return false;
2532 }
2533 goto no_join_cache;
2534 case JT_SYSTEM:
2535 case JT_CONST:
2536 case JT_REF:
2537 case JT_EQ_REF:
2538 if (!bka_on)
2539 {
2540 DBUG_ASSERT(tab->use_join_cache == JOIN_CACHE::ALG_NONE);
2541 goto no_join_cache;
2542 }
2543
2544 /*
2545 Disable BKA for materializable derived tables/views as they aren't
2546 instantiated yet.
2547 */
2548 if (tab->table->pos_in_table_list->uses_materialization())
2549 goto no_join_cache;
2550
2551 /*
2552 Can't use BKA for subquery if dealing with a subquery that can
2553 turn a ref access into a "full scan on NULL key" table scan.
2554
2555 @see Item_in_optimizer::val_int()
2556 @see subselect_single_select_engine::exec()
2557 @see TABLE_REF::cond_guards
2558 @see push_index_cond()
2559
2560 @todo: This choice to not use BKA should be done before making
2561 cost estimates, e.g. in set_join_buffer_properties(). That
2562 happens before cond guards are set up, so instead of doing the
2563 check below, BKA should be disabled if
2564 - We are in an IN subquery, and
2565 - The IN predicate is not a top_level_item, and
2566 - The left_expr of the IN predicate may contain NULL values
2567 (left_expr->maybe_null)
2568 */
2569 if (tab->has_guarded_conds())
2570 goto no_join_cache;
2571
2572 flags= HA_MRR_NO_NULL_ENDPOINTS;
2573 if (tab->table->covering_keys.is_set(tab->ref.key))
2574 flags|= HA_MRR_INDEX_ONLY;
2575 rows= tab->table->file->multi_range_read_info(tab->ref.key, 10, 20,
2576 &bufsz, &flags, &cost);
2577 /*
2578 Cannot use BKA/BKA_UNIQUE if
2579 1. MRR scan cannot be performed, or
2580 2. MRR default implementation is used
2581 Cannot use BKA if
2582 3. HA_MRR_NO_ASSOCIATION flag is set
2583 */
2584 if ((rows == HA_POS_ERROR) || // 1
2585 (flags & HA_MRR_USE_DEFAULT_IMPL) || // 2
2586 ((flags & HA_MRR_NO_ASSOCIATION) && !use_bka_unique)) // 3
2587 goto no_join_cache;
2588
2589 if (!(options & SELECT_DESCRIBE))
2590 {
2591 if (use_bka_unique)
2592 tab->op= new JOIN_CACHE_BKA_UNIQUE(join, tab, flags, prev_cache);
2593 else
2594 tab->op= new JOIN_CACHE_BKA(join, tab, flags, prev_cache);
2595
2596 if (!tab->op || tab->op->init())
2597 goto no_join_cache;
2598 }
2599
2600 DBUG_ASSERT(might_do_join_buffering(join_buffer_alg(join->thd), tab));
2601 if (use_bka_unique)
2602 tab->use_join_cache= JOIN_CACHE::ALG_BKA_UNIQUE;
2603 else
2604 tab->use_join_cache= JOIN_CACHE::ALG_BKA;
2605
2606 return false;
2607 default : ;
2608 }
2609
2610 no_join_cache:
2611 if (bnl_on || bka_on)
2612 revise_cache_usage(tab);
2613 tab->use_join_cache= JOIN_CACHE::ALG_NONE;
2614 return false;
2615 }
2616
2617
2618 /**
2619 Setup the materialized table for a semi-join nest
2620
2621 @param tab join_tab for the materialized semi-join table
2622 @param tableno table number of materialized table
2623 @param inner_pos information about the first inner table of the subquery
2624 @param sjm_pos information about the materialized semi-join table,
2625 to be filled with data.
2626
2627 @details
2628 Setup execution structures for one semi-join materialization nest:
2629 - Create the materialization temporary table, including TABLE_LIST object.
2630 - Create a list of Item_field objects per column in the temporary table.
2631 - Create a keyuse array describing index lookups into the table
2632 (for MaterializeLookup)
2633
2634 @return False if OK, True if error
2635 */
2636
setup_materialized_table(JOIN_TAB * tab,uint tableno,const POSITION * inner_pos,POSITION * sjm_pos)2637 bool JOIN::setup_materialized_table(JOIN_TAB *tab, uint tableno,
2638 const POSITION *inner_pos,
2639 POSITION *sjm_pos)
2640 {
2641 DBUG_ENTER("JOIN::setup_materialized_table");
2642 const TABLE_LIST *const emb_sj_nest= inner_pos->table->emb_sj_nest;
2643 Semijoin_mat_optimize *const sjm_opt= &emb_sj_nest->nested_join->sjm;
2644 Semijoin_mat_exec *const sjm_exec= tab->sj_mat_exec;
2645 const uint field_count= emb_sj_nest->nested_join->sj_inner_exprs.elements;
2646
2647 DBUG_ASSERT(inner_pos->sj_strategy == SJ_OPT_MATERIALIZE_LOOKUP ||
2648 inner_pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN);
2649
2650 /*
2651 Set up the table to write to, do as select_union::create_result_table does
2652 */
2653 sjm_exec->table_param.init();
2654 sjm_exec->table_param.field_count= field_count;
2655 sjm_exec->table_param.bit_fields_as_long= true;
2656
2657 char buffer[NAME_LEN];
2658 const size_t len= my_snprintf(buffer, sizeof(buffer) - 1, "<subquery%u>",
2659 emb_sj_nest->nested_join->query_block_id);
2660 char *name= (char *)alloc_root(thd->mem_root, len + 1);
2661 if (name == NULL)
2662 DBUG_RETURN(true); /* purecov: inspected */
2663
2664 memcpy(name, buffer, len);
2665 name[len] = '\0';
2666 TABLE *table;
2667 if (!(table= create_tmp_table(thd, &sjm_exec->table_param,
2668 emb_sj_nest->nested_join->sj_inner_exprs,
2669 NULL,
2670 true /* distinct */,
2671 true /* save_sum_fields */,
2672 thd->variables.option_bits |
2673 TMP_TABLE_ALL_COLUMNS,
2674 HA_POS_ERROR /* rows_limit */,
2675 name)))
2676 DBUG_RETURN(true); /* purecov: inspected */
2677 sjm_exec->table= table;
2678 table->tablenr= tableno;
2679 table->map= (table_map)1 << tableno;
2680 table->file->extra(HA_EXTRA_WRITE_CACHE);
2681 table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
2682 table->reginfo.join_tab= tab;
2683 sj_tmp_tables.push_back(table);
2684 sjm_exec_list.push_back(sjm_exec);
2685
2686 if (!(sjm_opt->mat_fields=
2687 (Item_field **) alloc_root(thd->mem_root,
2688 field_count * sizeof(Item_field **))))
2689 DBUG_RETURN(true);
2690
2691 for (uint fieldno= 0; fieldno < field_count; fieldno++)
2692 {
2693 if (!(sjm_opt->mat_fields[fieldno]= new Item_field(table->field[fieldno])))
2694 DBUG_RETURN(true);
2695 }
2696
2697 TABLE_LIST *tl;
2698 if (!(tl= (TABLE_LIST *) alloc_root(thd->mem_root, sizeof(TABLE_LIST))))
2699 DBUG_RETURN(true);
2700 // TODO: May have to setup outer-join info for this TABLE_LIST !!!
2701
2702 tl->init_one_table("", 0, name, strlen(name), name, TL_IGNORE);
2703
2704 tl->table= table;
2705
2706 tab->table= table;
2707 tab->position= sjm_pos;
2708 tab->join= this;
2709
2710 tab->worst_seeks= 1.0;
2711 tab->records= (ha_rows)emb_sj_nest->nested_join->sjm.expected_rowcount;
2712 tab->found_records= tab->records;
2713 tab->read_time= (ha_rows)emb_sj_nest->nested_join->sjm.scan_cost.total_cost();
2714
2715 tab->on_expr_ref= tl->join_cond_ref();
2716
2717 tab->materialize_table= join_materialize_semijoin;
2718
2719 table->pos_in_table_list= tl;
2720 table->keys_in_use_for_query.set_all();
2721 sjm_pos->table= tab;
2722 sjm_pos->sj_strategy= SJ_OPT_NONE;
2723
2724 sjm_pos->use_join_buffer= false;
2725
2726 /*
2727 Key_use objects are required so that create_ref_for_key() can set up
2728 a proper ref access for this table.
2729 */
2730 Key_use_array *keyuse=
2731 create_keyuse_for_table(thd, table, field_count, sjm_opt->mat_fields,
2732 emb_sj_nest->nested_join->sj_outer_exprs);
2733 if (!keyuse)
2734 DBUG_RETURN(true);
2735
2736 double fanout= (tab == join_tab + tab->join->const_tables) ?
2737 1.0 : (tab-1)->position->prefix_record_count;
2738 if (!sjm_exec->is_scan)
2739 {
2740 sjm_pos->key= keyuse->begin(); // MaterializeLookup will use the index
2741 tab->keyuse= keyuse->begin();
2742 tab->keys.set_bit(0); // There is one index - use it always
2743 tab->index= 0;
2744 sjm_pos->set_prefix_costs(1.0, fanout);
2745 sjm_pos->records_read= 1.0;
2746 sjm_pos->read_time= 1.0;
2747 }
2748 else
2749 {
2750 sjm_pos->key= NULL; // No index use for MaterializeScan
2751 sjm_pos->set_prefix_costs(tab->read_time, tab->records * fanout);
2752 sjm_pos->records_read= tab->records;
2753 sjm_pos->read_time= tab->read_time;
2754 }
2755
2756 DBUG_RETURN(false);
2757 }
2758
2759
2760 /**
2761 Plan refinement stage: do various setup things for the executor
2762
2763 @param join Join being processed
2764 @param options Join's options (checking for SELECT_DESCRIBE,
2765 SELECT_NO_JOIN_CACHE)
2766 @param no_jbuf_after Don't use join buffering after table with this number.
2767
2768 @return false if successful, true if error (Out of memory)
2769
2770 @details
2771 Plan refinement stage: do various set ups for the executioner
2772 - setup join buffering use
2773 - push index conditions
2774 - increment relevant counters
2775 - etc
2776 */
2777
2778 bool
make_join_readinfo(JOIN * join,ulonglong options,uint no_jbuf_after)2779 make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after)
2780 {
2781 const bool statistics= MY_TEST(!(join->select_options & SELECT_DESCRIBE));
2782
2783 DBUG_ENTER("make_join_readinfo");
2784
2785 Opt_trace_context * const trace= &join->thd->opt_trace;
2786 Opt_trace_object wrapper(trace);
2787 Opt_trace_array trace_refine_plan(trace, "refine_plan");
2788
2789 if (setup_semijoin_dups_elimination(join, options, no_jbuf_after))
2790 DBUG_RETURN(TRUE); /* purecov: inspected */
2791
2792 for (uint i= join->const_tables; i < join->tables; i++)
2793 {
2794 JOIN_TAB *const tab= join->join_tab+i;
2795 TABLE *const table= tab->table;
2796 if (!tab->position)
2797 continue;
2798
2799 bool icp_other_tables_ok;
2800 tab->read_record.table= table;
2801 tab->next_select=sub_select; /* normal select */
2802 tab->cache_idx_cond= 0;
2803 table->status= STATUS_GARBAGE | STATUS_NOT_FOUND;
2804 tab->read_first_record= NULL; // Access methods not set yet
2805 tab->read_record.read_record= NULL;
2806 tab->read_record.unlock_row= rr_unlock_row;
2807
2808 Opt_trace_object trace_refine_table(trace);
2809 trace_refine_table.add_utf8_table(table);
2810
2811 if (tab->do_loosescan())
2812 {
2813 if (!(tab->loosescan_buf= (uchar*)join->thd->alloc(tab->
2814 loosescan_key_len)))
2815 DBUG_RETURN(TRUE); /* purecov: inspected */
2816 }
2817 switch (tab->type) {
2818 case JT_EQ_REF:
2819 case JT_REF_OR_NULL:
2820 case JT_REF:
2821 if (tab->select)
2822 tab->select->set_quick(NULL);
2823 delete tab->quick;
2824 tab->quick=0;
2825 /* fall through */
2826 case JT_SYSTEM:
2827 case JT_CONST:
2828 /* Only happens with outer joins */
2829 if (setup_join_buffering(tab, join, options, no_jbuf_after,
2830 &icp_other_tables_ok))
2831 DBUG_RETURN(true);
2832 if (tab->use_join_cache != JOIN_CACHE::ALG_NONE)
2833 tab[-1].next_select= sub_select_op;
2834
2835 if (table->covering_keys.is_set(tab->ref.key) &&
2836 !table->no_keyread)
2837 table->set_keyread(TRUE);
2838 else
2839 push_index_cond(tab, tab->ref.key, icp_other_tables_ok,
2840 &trace_refine_table);
2841 break;
2842 case JT_ALL:
2843 if (setup_join_buffering(tab, join, options, no_jbuf_after,
2844 &icp_other_tables_ok))
2845 DBUG_RETURN(true);
2846 if (tab->use_join_cache != JOIN_CACHE::ALG_NONE)
2847 tab[-1].next_select=sub_select_op;
2848
2849 /* These init changes read_record */
2850 if (tab->use_quick == QS_DYNAMIC_RANGE)
2851 {
2852 join->thd->set_status_no_good_index_used();
2853 tab->read_first_record= join_init_quick_read_record;
2854 if (statistics)
2855 join->thd->inc_status_select_range_check();
2856 trace_refine_table.add_alnum("access_type", "dynamic_range");
2857 }
2858 else
2859 {
2860 tab->read_first_record= join_init_read_record;
2861 if (i == join->const_tables)
2862 {
2863 if (tab->select && tab->select->quick)
2864 {
2865 if (statistics)
2866 join->thd->inc_status_select_range();
2867 }
2868 else
2869 {
2870 join->thd->set_status_no_index_used();
2871 if (statistics)
2872 {
2873 join->thd->inc_status_select_scan();
2874 join->thd->query_plan_flags|= QPLAN_FULL_SCAN;
2875 }
2876 }
2877 }
2878 else
2879 {
2880 if (tab->select && tab->select->quick)
2881 {
2882 if (statistics)
2883 join->thd->inc_status_select_full_range_join();
2884 }
2885 else
2886 {
2887 join->thd->set_status_no_index_used();
2888 if (statistics)
2889 {
2890 join->thd->inc_status_select_full_join();
2891 join->thd->query_plan_flags|= QPLAN_FULL_JOIN;
2892 }
2893 }
2894 }
2895 if (!table->no_keyread)
2896 {
2897 if (tab->select && tab->select->quick &&
2898 tab->select->quick->index != MAX_KEY && //not index_merge
2899 table->covering_keys.is_set(tab->select->quick->index))
2900 table->set_keyread(TRUE);
2901 else if (!table->covering_keys.is_clear_all() &&
2902 !(tab->select && tab->select->quick))
2903 { // Only read index tree
2904 /*
2905 It has turned out that the below change, while speeding things
2906 up for disk-bound loads, slows them down for cases when the data
2907 is in disk cache (see BUG#35850):
2908 // See bug #26447: "Using the clustered index for a table scan
2909 // is always faster than using a secondary index".
2910 if (table->s->primary_key != MAX_KEY &&
2911 table->file->primary_key_is_clustered())
2912 tab->index= table->s->primary_key;
2913 else
2914 tab->index=find_shortest_key(table, & table->covering_keys);
2915 */
2916 if (!tab->do_loosescan())
2917 tab->index=find_shortest_key(table, & table->covering_keys);
2918 tab->read_first_record= join_read_first;
2919 tab->type=JT_INDEX_SCAN; // Read with index_first / index_next
2920 }
2921 else if (!(tab->select && tab->select->quick))
2922 {
2923 DBUG_ASSERT(table->covering_keys.is_clear_all());
2924 if (!tab->do_loosescan())
2925 {
2926 key_map clustering_keys;
2927 for (uint i= 0; i < table->s->keys; i++)
2928 {
2929 if (tab->keys.is_set(i)
2930 && table->file->index_flags(i, 0, 0) & HA_CLUSTERED_INDEX)
2931 clustering_keys.set_bit(i);
2932 }
2933 uint index= find_shortest_key(table, &clustering_keys);
2934 if (index != MAX_KEY)
2935 {
2936 tab->index= index;
2937 tab->read_first_record= join_read_first;
2938 tab->type= JT_INDEX_SCAN;
2939 }
2940 }
2941 }
2942 }
2943 if (tab->select && tab->select->quick &&
2944 tab->select->quick->index != MAX_KEY && ! tab->table->key_read)
2945 push_index_cond(tab, tab->select->quick->index, icp_other_tables_ok,
2946 &trace_refine_table);
2947 trace_refine_table.add_alnum("access_type",
2948 tab->type == JT_INDEX_SCAN ?
2949 "index_scan" :
2950 (tab->select && tab->select->quick) ?
2951 "range" : "table_scan");
2952 }
2953 break;
2954 case JT_FT:
2955 break;
2956 default:
2957 DBUG_PRINT("error",("Table type %d found",tab->type)); /* purecov: deadcode */
2958 break; /* purecov: deadcode */
2959 case JT_UNKNOWN:
2960 abort(); /* purecov: deadcode */
2961 }
2962 // Materialize derived tables prior to accessing them.
2963 if (tab->table->pos_in_table_list->uses_materialization())
2964 tab->materialize_table= join_materialize_derived;
2965 }
2966
2967 for (uint i= join->const_tables; i < join->primary_tables; i++)
2968 {
2969 if (join->join_tab[i].use_join_cache != JOIN_CACHE::ALG_NONE)
2970 {
2971 /*
2972 A join buffer is used for this table. We here inform the optimizer
2973 that it should not rely on rows of the first non-const table being in
2974 order thanks to an index scan; indeed join buffering of the present
2975 table subsequently changes the order of rows.
2976 */
2977 if (join->order != NULL)
2978 join->simple_order= false;
2979 if (join->group_list != NULL)
2980 join->simple_group= false;
2981 break;
2982 }
2983 }
2984
2985 DBUG_RETURN(FALSE);
2986 }
2987
2988
2989 /**
2990 Give error if we some tables are done with a full join.
2991
2992 This is used by multi_table_update and multi_table_delete when running
2993 in safe mode.
2994
2995 @param join Join condition
2996
2997 @retval
2998 0 ok
2999 @retval
3000 1 Error (full join used)
3001 */
3002
error_if_full_join(JOIN * join)3003 bool error_if_full_join(JOIN *join)
3004 {
3005 for (uint i= 0; i < join->primary_tables; i++)
3006 {
3007 JOIN_TAB *const tab= join->join_tab + i;
3008
3009 if (tab->type == JT_ALL && (!tab->select || !tab->select->quick))
3010 {
3011 /* This error should not be ignored. */
3012 join->select_lex->no_error= FALSE;
3013 my_message(ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE,
3014 ER(ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE), MYF(0));
3015 return true;
3016 }
3017 }
3018 return false;
3019 }
3020
3021
3022 /**
3023 Cleanup table of join operation.
3024
3025 @note
3026 Notice that this is not a complete cleanup. In some situations, the
3027 object may be reused after a cleanup operation, hence we cannot set
3028 the table pointer to NULL in this function.
3029 */
3030
cleanup()3031 void JOIN_TAB::cleanup()
3032 {
3033 delete select;
3034 select= 0;
3035 delete quick;
3036 quick= 0;
3037 limit= 0;
3038
3039 // Free select that was created for filesort outside of create_sort_index
3040 if (filesort && filesort->select && !filesort->own_select)
3041 delete filesort->select;
3042 delete filesort;
3043 filesort= NULL;
3044 /* Skip non-existing derived tables/views result tables */
3045 if (table &&
3046 (table->s->tmp_table != INTERNAL_TMP_TABLE || table->is_created()))
3047 {
3048 table->set_keyread(FALSE);
3049 table->file->ha_index_or_rnd_end();
3050
3051 free_io_cache(table);
3052 filesort_free_buffers(table, true);
3053 /*
3054 We need to reset this for next select
3055 (Tested in part_of_refkey)
3056 */
3057 table->reginfo.join_tab= NULL;
3058 if (table->pos_in_table_list)
3059 {
3060 table->pos_in_table_list->derived_keys_ready= false;
3061 table->pos_in_table_list->derived_key_list.empty();
3062 }
3063 }
3064 end_read_record(&read_record);
3065 }
3066
sjm_query_block_id() const3067 uint JOIN_TAB::sjm_query_block_id() const
3068 {
3069 return sj_is_materialize_strategy(get_sj_strategy()) ?
3070 first_sj_inner_tab->emb_sj_nest->nested_join->query_block_id : 0;
3071 }
3072
3073
3074 /**
3075 Extend join_tab->m_condition and join_tab->select->cond by AND'ing
3076 add_cond to them
3077
3078 @param add_cond The condition to AND with the existing conditions
3079 @param line Code line this method was called from
3080
3081 @retval true if there was a memory allocation error
3082 @retval false otherwise
3083
3084 */
and_with_jt_and_sel_condition(Item * add_cond,uint line)3085 bool JOIN_TAB::and_with_jt_and_sel_condition(Item *add_cond, uint line)
3086 {
3087 if (and_with_condition(add_cond, line))
3088 return true;
3089
3090 if (select)
3091 {
3092 DBUG_PRINT("info",
3093 ("select::cond extended. Change %p -> %p "
3094 "at line %u tab %p select %p",
3095 select->cond, m_condition, line, this, select));
3096 select->cond= m_condition;
3097 }
3098 return false;
3099 }
3100
3101 /**
3102 Extend join_tab->cond by AND'ing add_cond to it
3103
3104 @param add_cond The condition to AND with the existing cond
3105 for this JOIN_TAB
3106 @param line Code line this method was called from
3107
3108 @retval true if there was a memory allocation error
3109 @retval false otherwise
3110 */
and_with_condition(Item * add_cond,uint line)3111 bool JOIN_TAB::and_with_condition(Item *add_cond, uint line)
3112 {
3113 Item *old_cond MY_ATTRIBUTE((unused))= m_condition;
3114 if (and_conditions(&m_condition, add_cond))
3115 return true;
3116 DBUG_PRINT("info", ("JOIN_TAB::m_condition extended. Change %p -> %p "
3117 "at line %u tab %p",
3118 old_cond, m_condition, line, this));
3119 return false;
3120 }
3121
3122
3123 /**
3124 Check if JOIN_TAB condition was moved to Filesort condition.
3125 If yes then return condition belonging to Filesort, otherwise
3126 return condition belonging to JOIN_TAB.
3127 */
3128
unified_condition() const3129 Item *JOIN_TAB::unified_condition() const
3130 {
3131 return filesort && filesort->select ? filesort->select->cond : condition();
3132 }
3133
3134
3135 /**
3136 Partially cleanup JOIN after it has executed: close index or rnd read
3137 (table cursors), free quick selects.
3138
3139 This function is called in the end of execution of a JOIN, before the used
3140 tables are unlocked and closed.
3141
3142 For a join that is resolved using a temporary table, the first sweep is
3143 performed against actual tables and an intermediate result is inserted
3144 into the temprorary table.
3145 The last sweep is performed against the temporary table. Therefore,
3146 the base tables and associated buffers used to fill the temporary table
3147 are no longer needed, and this function is called to free them.
3148
3149 For a join that is performed without a temporary table, this function
3150 is called after all rows are sent, but before EOF packet is sent.
3151
3152 For a simple SELECT with no subqueries this function performs a full
3153 cleanup of the JOIN and calls mysql_unlock_read_tables to free used base
3154 tables.
3155
3156 If a JOIN is executed for a subquery or if it has a subquery, we can't
3157 do the full cleanup and need to do a partial cleanup only.
3158 - If a JOIN is not the top level join, we must not unlock the tables
3159 because the outer select may not have been evaluated yet, and we
3160 can't unlock only selected tables of a query.
3161 - Additionally, if this JOIN corresponds to a correlated subquery, we
3162 should not free quick selects and join buffers because they will be
3163 needed for the next execution of the correlated subquery.
3164 - However, if this is a JOIN for a [sub]select, which is not
3165 a correlated subquery itself, but has subqueries, we can free it
3166 fully and also free JOINs of all its subqueries. The exception
3167 is a subquery in SELECT list, e.g: @n
3168 SELECT a, (select max(b) from t1) group by c @n
3169 This subquery will not be evaluated at first sweep and its value will
3170 not be inserted into the temporary table. Instead, it's evaluated
3171 when selecting from the temporary table. Therefore, it can't be freed
3172 here even though it's not correlated.
3173
3174 @todo
3175 Unlock tables even if the join isn't top level select in the tree
3176 */
3177
join_free()3178 void JOIN::join_free()
3179 {
3180 SELECT_LEX_UNIT *tmp_unit;
3181 SELECT_LEX *sl;
3182 /*
3183 Optimization: if not EXPLAIN and we are done with the JOIN,
3184 free all tables.
3185 */
3186 bool full= (!select_lex->uncacheable && !thd->lex->describe);
3187 bool can_unlock= full;
3188 DBUG_ENTER("JOIN::join_free");
3189
3190 cleanup(full);
3191
3192 for (tmp_unit= select_lex->first_inner_unit();
3193 tmp_unit;
3194 tmp_unit= tmp_unit->next_unit())
3195 for (sl= tmp_unit->first_select(); sl; sl= sl->next_select())
3196 {
3197 Item_subselect *subselect= sl->master_unit()->item;
3198 bool full_local= full && (!subselect || subselect->is_evaluated());
3199 /*
3200 If this join is evaluated, we can fully clean it up and clean up all
3201 its underlying joins even if they are correlated -- they will not be
3202 used any more anyway.
3203 If this join is not yet evaluated, we still must clean it up to
3204 close its table cursors -- it may never get evaluated, as in case of
3205 ... HAVING FALSE OR a IN (SELECT ...))
3206 but all table cursors must be closed before the unlock.
3207 */
3208 sl->cleanup_all_joins(full_local);
3209 /* Can't unlock if at least one JOIN is still needed */
3210 can_unlock= can_unlock && full_local;
3211 }
3212
3213 /*
3214 We are not using tables anymore
3215 Unlock all tables. We may be in an INSERT .... SELECT statement.
3216 */
3217 if (can_unlock && lock && thd->lock && ! thd->locked_tables_mode &&
3218 !(select_options & SELECT_NO_UNLOCK) &&
3219 !select_lex->subquery_in_having &&
3220 (select_lex == (thd->lex->unit.fake_select_lex ?
3221 thd->lex->unit.fake_select_lex : &thd->lex->select_lex)))
3222 {
3223 /*
3224 TODO: unlock tables even if the join isn't top level select in the
3225 tree.
3226 */
3227 mysql_unlock_read_tables(thd, lock); // Don't free join->lock
3228 lock= 0;
3229 }
3230
3231 DBUG_VOID_RETURN;
3232 }
3233
3234
3235 /**
3236 Free resources of given join.
3237
3238 @param fill true if we should free all resources, call with full==1
3239 should be last, before it this function can be called with
3240 full==0
3241
3242 @note
3243 With subquery this function definitely will be called several times,
3244 but even for simple query it can be called several times.
3245 */
3246
cleanup(bool full)3247 void JOIN::cleanup(bool full)
3248 {
3249 DBUG_ENTER("JOIN::cleanup");
3250
3251 DBUG_ASSERT(const_tables <= primary_tables &&
3252 primary_tables <= tables);
3253
3254 if (join_tab)
3255 {
3256 JOIN_TAB *tab,*end;
3257
3258 if (full)
3259 {
3260 for (tab= join_tab, end= tab + tables; tab < end; tab++)
3261 tab->cleanup();
3262 }
3263 else
3264 {
3265 for (tab= join_tab, end= tab + tables; tab < end; tab++)
3266 {
3267 if (!tab->table)
3268 continue;
3269 if (tab->table->is_created())
3270 {
3271 tab->table->file->ha_index_or_rnd_end();
3272 if (tab->op &&
3273 tab->op->type() == QEP_operation::OT_TMP_TABLE)
3274 {
3275 int tmp= 0;
3276 if ((tmp= tab->table->file->extra(HA_EXTRA_NO_CACHE)))
3277 tab->table->file->print_error(tmp, MYF(0));
3278 }
3279 }
3280 free_io_cache(tab->table);
3281 filesort_free_buffers(tab->table, full);
3282 }
3283 }
3284 }
3285 /*
3286 We are not using tables anymore
3287 Unlock all tables. We may be in an INSERT .... SELECT statement.
3288 */
3289 if (full)
3290 {
3291 // Run Cached_item DTORs!
3292 group_fields.delete_elements();
3293
3294 /*
3295 We can't call delete_elements() on copy_funcs as this will cause
3296 problems in free_elements() as some of the elements are then deleted.
3297 */
3298 tmp_table_param.copy_funcs.empty();
3299 tmp_table_param.cleanup();
3300 }
3301 /* Restore ref array to original state */
3302 if (current_ref_ptrs != items0)
3303 {
3304 set_items_ref_array(items0);
3305 set_group_rpa= false;
3306 }
3307 DBUG_VOID_RETURN;
3308 }
3309
3310
3311 /**
3312 Filter out ORDER items those are equal to constants in WHERE
3313
3314 This function is a limited version of remove_const() for use
3315 with non-JOIN statements (i.e. single-table UPDATE and DELETE).
3316
3317
3318 @param order Linked list of ORDER BY arguments
3319 @param cond WHERE expression
3320
3321 @return pointer to new filtered ORDER list or NULL if whole list eliminated
3322
3323 @note
3324 This function overwrites input order list.
3325 */
3326
simple_remove_const(ORDER * order,Item * where)3327 ORDER *simple_remove_const(ORDER *order, Item *where)
3328 {
3329 if (!order || !where)
3330 return order;
3331
3332 ORDER *first= NULL, *prev= NULL;
3333 for (; order; order= order->next)
3334 {
3335 DBUG_ASSERT(!order->item[0]->with_sum_func); // should never happen
3336 if (!const_expression_in_where(where, order->item[0]))
3337 {
3338 if (!first)
3339 first= order;
3340 if (prev)
3341 prev->next= order;
3342 prev= order;
3343 }
3344 }
3345 if (prev)
3346 prev->next= NULL;
3347 return first;
3348 }
3349
3350
3351
3352
3353 /*
3354 Check if equality can be used in removing components of GROUP BY/DISTINCT
3355
3356 SYNOPSIS
3357 test_if_equality_guarantees_uniqueness()
3358 l the left comparison argument (a field if any)
3359 r the right comparison argument (a const of any)
3360
3361 DESCRIPTION
3362 Checks if an equality predicate can be used to take away
3363 DISTINCT/GROUP BY because it is known to be true for exactly one
3364 distinct value (e.g. <expr> == <const>).
3365 Arguments must be of the same type because e.g.
3366 <string_field> = <int_const> may match more than 1 distinct value from
3367 the column.
3368 We must take into consideration and the optimization done for various
3369 string constants when compared to dates etc (see Item_int_with_ref) as
3370 well as the collation of the arguments.
3371
3372 RETURN VALUE
3373 TRUE can be used
3374 FALSE cannot be used
3375 */
3376 static bool
test_if_equality_guarantees_uniqueness(Item * l,Item * r)3377 test_if_equality_guarantees_uniqueness(Item *l, Item *r)
3378 {
3379 return r->const_item() &&
3380 /* elements must be compared as dates */
3381 (Arg_comparator::can_compare_as_dates(l, r, 0) ||
3382 /* or of the same result type */
3383 (r->result_type() == l->result_type() &&
3384 /* and must have the same collation if compared as strings */
3385 (l->result_type() != STRING_RESULT ||
3386 l->collation.collation == r->collation.collation)));
3387 }
3388
3389
3390 /*
3391 Return TRUE if i1 and i2 (if any) are equal items,
3392 or if i1 is a wrapper item around the f2 field.
3393 */
3394
equal(Item * i1,Item * i2,Field * f2)3395 static bool equal(Item *i1, Item *i2, Field *f2)
3396 {
3397 DBUG_ASSERT((i2 == NULL) ^ (f2 == NULL));
3398
3399 if (i2 != NULL)
3400 return i1->eq(i2, 1);
3401 else if (i1->type() == Item::FIELD_ITEM)
3402 return f2->eq(((Item_field *) i1)->field);
3403 else
3404 return FALSE;
3405 }
3406
3407
3408 /**
3409 Test if a field or an item is equal to a constant value in WHERE
3410
3411 @param cond WHERE clause expression
3412 @param comp_item Item to find in WHERE expression
3413 (if comp_field != NULL)
3414 @param comp_field Field to find in WHERE expression
3415 (if comp_item != NULL)
3416 @param[out] const_item intermediate arg, set to Item pointer to NULL
3417
3418 @return TRUE if the field is a constant value in WHERE
3419
3420 @note
3421 comp_item and comp_field parameters are mutually exclusive.
3422 */
3423 bool
const_expression_in_where(Item * cond,Item * comp_item,Field * comp_field,Item ** const_item)3424 const_expression_in_where(Item *cond, Item *comp_item, Field *comp_field,
3425 Item **const_item)
3426 {
3427 DBUG_ASSERT((comp_item == NULL) ^ (comp_field == NULL));
3428
3429 Item *intermediate= NULL;
3430 if (const_item == NULL)
3431 const_item= &intermediate;
3432
3433 if (cond->type() == Item::COND_ITEM)
3434 {
3435 bool and_level= (((Item_cond*) cond)->functype()
3436 == Item_func::COND_AND_FUNC);
3437 List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
3438 Item *item;
3439 while ((item=li++))
3440 {
3441 bool res=const_expression_in_where(item, comp_item, comp_field,
3442 const_item);
3443 if (res) // Is a const value
3444 {
3445 if (and_level)
3446 return 1;
3447 }
3448 else if (!and_level)
3449 return 0;
3450 }
3451 return and_level ? 0 : 1;
3452 }
3453 else if (cond->eq_cmp_result() != Item::COND_OK)
3454 { // boolean compare function
3455 Item_func* func= (Item_func*) cond;
3456 if (func->functype() != Item_func::EQUAL_FUNC &&
3457 func->functype() != Item_func::EQ_FUNC)
3458 return 0;
3459 Item *left_item= ((Item_func*) cond)->arguments()[0];
3460 Item *right_item= ((Item_func*) cond)->arguments()[1];
3461 if (equal(left_item, comp_item, comp_field))
3462 {
3463 if (test_if_equality_guarantees_uniqueness (left_item, right_item))
3464 {
3465 if (*const_item)
3466 return right_item->eq(*const_item, 1);
3467 *const_item=right_item;
3468 return 1;
3469 }
3470 }
3471 else if (equal(right_item, comp_item, comp_field))
3472 {
3473 if (test_if_equality_guarantees_uniqueness (right_item, left_item))
3474 {
3475 if (*const_item)
3476 return left_item->eq(*const_item, 1);
3477 *const_item=left_item;
3478 return 1;
3479 }
3480 }
3481 }
3482 return 0;
3483 }
3484
3485 /**
3486 Test if this is a prefix index.
3487
3488 @param table table
3489 @param idx index to check
3490
3491 @return TRUE if this is a prefix index
3492 */
is_prefix_index(TABLE * table,uint idx)3493 bool is_prefix_index(TABLE* table, uint idx)
3494 {
3495 if (!table || !table->key_info)
3496 {
3497 return false;
3498 }
3499 KEY* key_info = table->key_info;
3500 uint key_parts = key_info[idx].user_defined_key_parts;
3501 KEY_PART_INFO* key_part = key_info[idx].key_part;
3502
3503 for (uint i = 0; i < key_parts; i++, key_part++)
3504 {
3505 if (key_part->field &&
3506 (key_part->length !=
3507 table->field[key_part->fieldnr - 1]->key_length() &&
3508 !(key_info->flags & (HA_FULLTEXT | HA_SPATIAL))))
3509 {
3510 return true;
3511 }
3512 }
3513 return false;
3514 }
3515
3516 /**
3517 Test if one can use the key to resolve ORDER BY.
3518
3519 @param order Sort order
3520 @param table Table to sort
3521 @param idx Index to check
3522 @param used_key_parts [out] NULL by default, otherwise return value for
3523 used key parts.
3524
3525
3526 @note
3527 used_key_parts is set to correct key parts used if return value != 0
3528 (On other cases, used_key_part may be changed)
3529 Note that the value may actually be greater than the number of index
3530 key parts. This can happen for storage engines that have the primary
3531 key parts as a suffix for every secondary key.
3532
3533 @retval
3534 1 key is ok.
3535 @retval
3536 0 Key can't be used
3537 @retval
3538 -1 Reverse key can be used
3539 */
3540
test_if_order_by_key(ORDER * order,TABLE * table,uint idx,uint * used_key_parts)3541 int test_if_order_by_key(ORDER *order, TABLE *table, uint idx,
3542 uint *used_key_parts)
3543 {
3544 KEY_PART_INFO *key_part,*key_part_end;
3545 key_part=table->key_info[idx].key_part;
3546 key_part_end=key_part+table->key_info[idx].user_defined_key_parts;
3547 key_part_map const_key_parts=table->const_key_parts[idx];
3548 int reverse=0;
3549 uint key_parts;
3550 my_bool on_pk_suffix= FALSE;
3551 DBUG_ENTER("test_if_order_by_key");
3552
3553 for (; order ; order=order->next, const_key_parts>>=1)
3554 {
3555 /*
3556 Since only fields can be indexed, ORDER BY <something> that is
3557 not a field cannot be resolved by using an index.
3558 */
3559 Item *real_itm= (*order->item)->real_item();
3560 if (real_itm->type() != Item::FIELD_ITEM)
3561 DBUG_RETURN(0);
3562
3563 Field *field= static_cast<Item_field*>(real_itm)->field;
3564 int flag;
3565
3566 /*
3567 Skip key parts that are constants in the WHERE clause.
3568 These are already skipped in the ORDER BY by const_expression_in_where()
3569 */
3570 for (; const_key_parts & 1 ; const_key_parts>>= 1)
3571 key_part++;
3572
3573 #ifdef WITH_PARTITION_STORAGE_ENGINE
3574 /* Avoid usage of prefix index for sorting a partition table */
3575 if (table->part_info && key_part != table->key_info[idx].key_part &&
3576 key_part != key_part_end && is_prefix_index(table, idx))
3577 DBUG_RETURN(0);
3578 #endif
3579
3580 if (key_part == key_part_end)
3581 {
3582 /*
3583 We are at the end of the key. Check if the engine has the primary
3584 key as a suffix to the secondary keys. If it has continue to check
3585 the primary key as a suffix.
3586 */
3587 if (!on_pk_suffix &&
3588 (table->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX) &&
3589 table->s->primary_key != MAX_KEY &&
3590 table->s->primary_key != idx)
3591 {
3592 on_pk_suffix= TRUE;
3593 key_part= table->key_info[table->s->primary_key].key_part;
3594 key_part_end=key_part +
3595 table->key_info[table->s->primary_key].user_defined_key_parts;
3596 const_key_parts=table->const_key_parts[table->s->primary_key];
3597
3598 for (; const_key_parts & 1 ; const_key_parts>>= 1)
3599 key_part++;
3600 /*
3601 The primary and secondary key parts were all const (i.e. there's
3602 one row). The sorting doesn't matter.
3603 */
3604 if (key_part == key_part_end && reverse == 0)
3605 {
3606 key_parts= 0;
3607 reverse= 1;
3608 goto ok;
3609 }
3610 }
3611 else
3612 DBUG_RETURN(0);
3613 }
3614
3615 if (key_part->field != field || !field->part_of_sortkey.is_set(idx))
3616 DBUG_RETURN(0);
3617
3618 const ORDER::enum_order keypart_order=
3619 (key_part->key_part_flag & HA_REVERSE_SORT) ?
3620 ORDER::ORDER_DESC : ORDER::ORDER_ASC;
3621 /* set flag to 1 if we can use read-next on key, else to -1 */
3622 flag= (order->direction == keypart_order) ? 1 : -1;
3623 if (reverse && flag != reverse)
3624 DBUG_RETURN(0);
3625 reverse=flag; // Remember if reverse
3626 key_part++;
3627 }
3628 if (on_pk_suffix)
3629 {
3630 uint used_key_parts_secondary= table->key_info[idx].user_defined_key_parts;
3631 uint used_key_parts_pk=
3632 (uint) (key_part - table->key_info[table->s->primary_key].key_part);
3633 key_parts= used_key_parts_pk + used_key_parts_secondary;
3634
3635 if (reverse == -1 &&
3636 (!(table->file->index_flags(idx, used_key_parts_secondary - 1, 1) &
3637 HA_READ_PREV) ||
3638 !(table->file->index_flags(table->s->primary_key,
3639 used_key_parts_pk - 1, 1) & HA_READ_PREV)))
3640 reverse= 0; // Index can't be used
3641 }
3642 else
3643 {
3644 key_parts= (uint) (key_part - table->key_info[idx].key_part);
3645 if (reverse == -1 &&
3646 !(table->file->index_flags(idx, key_parts-1, 1) & HA_READ_PREV))
3647 reverse= 0; // Index can't be used
3648 }
3649 ok:
3650 if (used_key_parts != NULL)
3651 *used_key_parts= key_parts;
3652 DBUG_RETURN(reverse);
3653 }
3654
3655
3656 /**
3657 Find shortest key suitable for full table scan.
3658
3659 @param table Table to scan
3660 @param usable_keys Allowed keys
3661
3662 @note
3663 As far as
3664 1) clustered primary key entry data set is a set of all record
3665 fields (key fields and not key fields) and
3666 2) secondary index entry data is a union of its key fields and
3667 primary key fields (at least InnoDB and its derivatives don't
3668 duplicate primary key fields there, even if the primary and
3669 the secondary keys have a common subset of key fields),
3670 then secondary index entry data is always a subset of primary key entry.
3671 Unfortunately, key_info[nr].key_length doesn't show the length
3672 of key/pointer pair but a sum of key field lengths only, thus
3673 we can't estimate index IO volume comparing only this key_length
3674 value of secondary keys and clustered PK.
3675 So, try secondary keys first, and choose PK only if there are no
3676 usable secondary covering keys or found best secondary key include
3677 all table fields (i.e. same as PK):
3678
3679 @return
3680 MAX_KEY no suitable key found
3681 key index otherwise
3682 */
3683
find_shortest_key(TABLE * table,const key_map * usable_keys)3684 uint find_shortest_key(TABLE *table, const key_map *usable_keys)
3685 {
3686 uint best= MAX_KEY;
3687 uint usable_clustered_pk= (table->file->primary_key_is_clustered() &&
3688 table->s->primary_key != MAX_KEY &&
3689 usable_keys->is_set(table->s->primary_key)) ?
3690 table->s->primary_key : MAX_KEY;
3691 if (!usable_keys->is_clear_all())
3692 {
3693 uint min_length= (uint) ~0;
3694 for (uint nr=0; nr < table->s->keys ; nr++)
3695 {
3696 if (nr == usable_clustered_pk)
3697 continue;
3698 if (usable_keys->is_set(nr))
3699 {
3700 if (table->key_info[nr].key_length < min_length)
3701 {
3702 min_length=table->key_info[nr].key_length;
3703 best=nr;
3704 }
3705 }
3706 }
3707 }
3708 if (usable_clustered_pk != MAX_KEY)
3709 {
3710 /*
3711 If the primary key is clustered and found shorter key covers all table
3712 fields and is not clustering then primary key scan normally would be
3713 faster because amount of data to scan is the same but PK is clustered.
3714 It's safe to compare key parts with table fields since duplicate key
3715 parts aren't allowed.
3716 */
3717 if (best == MAX_KEY ||
3718 ((table->key_info[best].user_defined_key_parts >= table->s->fields)
3719 && !(table->file->index_flags(best, 0, 0) & HA_CLUSTERED_INDEX)))
3720 best= usable_clustered_pk;
3721 }
3722 return best;
3723 }
3724
3725 /**
3726 Test if a second key is the subkey of the first one.
3727
3728 @param key_part First key parts
3729 @param ref_key_part Second key parts
3730 @param ref_key_part_end Last+1 part of the second key
3731
3732 @note
3733 Second key MUST be shorter than the first one.
3734
3735 @retval
3736 1 is a subkey
3737 @retval
3738 0 no sub key
3739 */
3740
3741 inline bool
is_subkey(KEY_PART_INFO * key_part,KEY_PART_INFO * ref_key_part,KEY_PART_INFO * ref_key_part_end)3742 is_subkey(KEY_PART_INFO *key_part, KEY_PART_INFO *ref_key_part,
3743 KEY_PART_INFO *ref_key_part_end)
3744 {
3745 for (; ref_key_part < ref_key_part_end; key_part++, ref_key_part++)
3746 if (!key_part->field->eq(ref_key_part->field))
3747 return 0;
3748 return 1;
3749 }
3750
3751 /**
3752 Test if REF_OR_NULL optimization will be used if the specified
3753 ref_key is used for REF-access to 'tab'
3754
3755 @retval
3756 true JT_REF_OR_NULL will be used
3757 @retval
3758 false no JT_REF_OR_NULL access
3759 */
3760 bool
is_ref_or_null_optimized(const JOIN_TAB * tab,uint ref_key)3761 is_ref_or_null_optimized(const JOIN_TAB *tab, uint ref_key)
3762 {
3763 if (tab->keyuse)
3764 {
3765 const Key_use *keyuse= tab->keyuse;
3766 while (keyuse->key != ref_key && keyuse->table == tab->table)
3767 keyuse++;
3768
3769 const table_map const_tables= tab->join->const_table_map;
3770 while (keyuse->key == ref_key && keyuse->table == tab->table)
3771 {
3772 if (!(keyuse->used_tables & ~const_tables))
3773 {
3774 if (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)
3775 return true;
3776 }
3777 keyuse++;
3778 }
3779 }
3780 return false;
3781 }
3782
3783 /**
3784 Test if we can use one of the 'usable_keys' instead of 'ref' key
3785 for sorting.
3786
3787 @param ref Number of key, used for WHERE clause
3788 @param usable_keys Keys for testing
3789
3790 @return
3791 - MAX_KEY If we can't use other key
3792 - the number of found key Otherwise
3793 */
3794
3795 static uint
test_if_subkey(ORDER * order,JOIN_TAB * tab,uint ref,uint ref_key_parts,const key_map * usable_keys)3796 test_if_subkey(ORDER *order, JOIN_TAB *tab, uint ref, uint ref_key_parts,
3797 const key_map *usable_keys)
3798 {
3799 uint nr;
3800 uint min_length= (uint) ~0;
3801 uint best= MAX_KEY;
3802 TABLE *table= tab->table;
3803 KEY_PART_INFO *ref_key_part= table->key_info[ref].key_part;
3804 KEY_PART_INFO *ref_key_part_end= ref_key_part + ref_key_parts;
3805
3806 for (nr= 0 ; nr < table->s->keys ; nr++)
3807 {
3808 if (usable_keys->is_set(nr) &&
3809 table->key_info[nr].key_length < min_length &&
3810 table->key_info[nr].user_defined_key_parts >= ref_key_parts &&
3811 is_subkey(table->key_info[nr].key_part, ref_key_part,
3812 ref_key_part_end) &&
3813 !is_ref_or_null_optimized(tab, nr) &&
3814 test_if_order_by_key(order, table, nr))
3815 {
3816 min_length= table->key_info[nr].key_length;
3817 best= nr;
3818 }
3819 }
3820 return best;
3821 }
3822
3823
3824 /**
3825 It is not obvious to see that test_if_skip_sort_order() never changes the
3826 plan if no_changes is true. So we double-check: creating an instance of this
3827 class saves some important access-path-related information of the current
3828 table; when the instance is destroyed, the latest access-path information is
3829 compared with saved data.
3830 */
3831 class Plan_change_watchdog
3832 {
3833 #ifndef DBUG_OFF
3834 public:
3835 /**
3836 @param tab_arg table whose access path is being determined
3837 @param no_changes whether a change to the access path is allowed
3838 */
Plan_change_watchdog(const JOIN_TAB * tab_arg,const bool no_changes_arg)3839 Plan_change_watchdog(const JOIN_TAB *tab_arg, const bool no_changes_arg)
3840 {
3841 // Only to keep gcc 4.1.2-44 silent about uninitialized variables
3842 quick= NULL;
3843 quick_index= 0;
3844 if (no_changes_arg)
3845 {
3846 tab= tab_arg;
3847 type= tab->type;
3848 if ((select= tab->select))
3849 if ((quick= tab->select->quick))
3850 quick_index= quick->index;
3851 use_quick= tab->use_quick;
3852 ref_key= tab->ref.key;
3853 ref_key_parts= tab->ref.key_parts;
3854 index= tab->index;
3855 }
3856 else
3857 {
3858 tab= NULL;
3859 // Only to keep gcc 4.1.2-44 silent about uninitialized variables
3860 type= JT_UNKNOWN;
3861 select= NULL;
3862 ref_key= ref_key_parts= index= 0;
3863 use_quick= QS_NONE;
3864 }
3865 }
~Plan_change_watchdog()3866 ~Plan_change_watchdog()
3867 {
3868 if (tab == NULL)
3869 return;
3870 // changes are not allowed, we verify:
3871 DBUG_ASSERT(tab->type == type);
3872 DBUG_ASSERT(tab->select == select);
3873 if (select != NULL)
3874 {
3875 DBUG_ASSERT(tab->select->quick == quick);
3876 if (quick != NULL)
3877 DBUG_ASSERT(tab->select->quick->index == quick_index);
3878 }
3879 DBUG_ASSERT(tab->use_quick == use_quick);
3880 DBUG_ASSERT(tab->ref.key == ref_key);
3881 DBUG_ASSERT(tab->ref.key_parts == ref_key_parts);
3882 DBUG_ASSERT(tab->index == index);
3883 }
3884 private:
3885 const JOIN_TAB *tab; ///< table, or NULL if changes are allowed
3886 enum join_type type; ///< copy of tab->type
3887 // "Range / index merge" info
3888 const SQL_SELECT *select; ///< copy of tab->select
3889 const QUICK_SELECT_I *quick; ///< copy of tab->select->quick
3890 uint quick_index; ///< copy of tab->select->quick->index
3891 enum quick_type use_quick; ///< copy of tab->use_quick
3892 // "ref access" info
3893 int ref_key; ///< copy of tab->ref.key
3894 uint ref_key_parts;/// copy of tab->ref.key_parts
3895 // Other index-related info
3896 uint index; ///< copy of tab->index
3897 #else // in non-debug build, empty class
3898 public:
3899 Plan_change_watchdog(const JOIN_TAB *tab_arg, const bool no_changes_arg) {}
3900 #endif
3901 };
3902
3903 /**
3904 Test if we can skip the ORDER BY by using an index.
3905
3906 SYNOPSIS
3907 test_if_skip_sort_order()
3908 tab
3909 order
3910 select_limit
3911 no_changes
3912 map
3913
3914 If we can use an index, the JOIN_TAB / tab->select struct
3915 is changed to use the index.
3916
3917 The index must cover all fields in <order>, or it will not be considered.
3918
3919 @param tab NULL or JOIN_TAB of the accessed table
3920 @param order Linked list of ORDER BY arguments
3921 @param select_limit LIMIT value, or HA_POS_ERROR if no limit
3922 @param no_changes No changes will be made to the query plan.
3923 @param map key_map of applicable indexes.
3924 @param clause_type "ORDER BY" etc for printing in optimizer trace
3925
3926 @todo
3927 - sergeyp: Results of all index merge selects actually are ordered
3928 by clustered PK values.
3929
3930 @retval
3931 0 We have to use filesort to do the sorting
3932 @retval
3933 1 We can use an index.
3934 */
3935
3936 bool
test_if_skip_sort_order(JOIN_TAB * tab,ORDER * order,ha_rows select_limit,const bool no_changes,const key_map * map,const char * clause_type)3937 test_if_skip_sort_order(JOIN_TAB *tab, ORDER *order, ha_rows select_limit,
3938 const bool no_changes, const key_map *map,
3939 const char *clause_type)
3940 {
3941 int ref_key;
3942 uint ref_key_parts;
3943 int order_direction= 0;
3944 uint used_key_parts;
3945 TABLE *table=tab->table;
3946 SQL_SELECT *select=tab->select;
3947 QUICK_SELECT_I *save_quick= select ? select->quick : NULL;
3948 int best_key= -1;
3949 Item *orig_cond;
3950 bool orig_cond_saved= false, set_up_ref_access_to_key= false;
3951 bool can_skip_sorting= false; // used as return value
3952 int changed_key= -1;
3953 DBUG_ENTER("test_if_skip_sort_order");
3954 LINT_INIT(ref_key_parts);
3955 LINT_INIT(orig_cond);
3956
3957 /* Check that we are always called with first non-const table */
3958 DBUG_ASSERT(tab == tab->join->join_tab + tab->join->const_tables);
3959
3960 Plan_change_watchdog watchdog(tab, no_changes);
3961
3962 /* Sorting a single row can always be skipped */
3963 if (tab->type == JT_EQ_REF ||
3964 tab->type == JT_CONST ||
3965 tab->type == JT_SYSTEM)
3966 {
3967 DBUG_RETURN(1);
3968 }
3969
3970 /* Check that we are always called with first non-const table */
3971 DBUG_ASSERT(tab == tab->join->join_tab + tab->join->const_tables);
3972
3973 /*
3974 Keys disabled by ALTER TABLE ... DISABLE KEYS should have already
3975 been taken into account.
3976 */
3977 key_map usable_keys= *map;
3978
3979 for (ORDER *tmp_order=order; tmp_order ; tmp_order=tmp_order->next)
3980 {
3981 Item *item= (*tmp_order->item)->real_item();
3982 if (item->type() != Item::FIELD_ITEM)
3983 {
3984 usable_keys.clear_all();
3985 DBUG_RETURN(0);
3986 }
3987 usable_keys.intersect(((Item_field*) item)->field->part_of_sortkey);
3988 if (usable_keys.is_clear_all())
3989 DBUG_RETURN(0); // No usable keys
3990 }
3991
3992 ref_key= -1;
3993 /* Test if constant range in WHERE */
3994 if (tab->ref.key >= 0 && tab->ref.key_parts)
3995 {
3996 if (tab->type == JT_REF_OR_NULL || tab->type == JT_FT)
3997 DBUG_RETURN(0);
3998 ref_key= tab->ref.key;
3999 ref_key_parts= tab->ref.key_parts;
4000 }
4001 else if (select && select->quick) // Range found by opt_range
4002 {
4003 int quick_type= select->quick->get_type();
4004 /*
4005 assume results are not ordered when index merge is used
4006 TODO: sergeyp: Results of all index merge selects actually are ordered
4007 by clustered PK values.
4008 */
4009
4010 if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE ||
4011 quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION ||
4012 quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT)
4013 DBUG_RETURN(0);
4014 ref_key= select->quick->index;
4015 ref_key_parts= select->quick->used_key_parts;
4016 }
4017
4018 /*
4019 If part of the select condition has been pushed we use the
4020 select condition as it was before pushing. The original
4021 select condition is saved so that it can be restored when
4022 exiting this function (if we have not changed index).
4023 */
4024 if (tab->pre_idx_push_cond)
4025 {
4026 orig_cond=
4027 tab->set_jt_and_sel_condition(tab->pre_idx_push_cond, __LINE__);
4028 orig_cond_saved= true;
4029 }
4030
4031 Opt_trace_context * const trace= &tab->join->thd->opt_trace;
4032 Opt_trace_object trace_wrapper(trace);
4033 Opt_trace_object
4034 trace_skip_sort_order(trace, "reconsidering_access_paths_for_index_ordering");
4035 trace_skip_sort_order.add_alnum("clause", clause_type);
4036
4037 if (ref_key >= 0)
4038 {
4039 /*
4040 We come here when there is a {ref or or ordered range access} key.
4041 */
4042 if (!usable_keys.is_set(ref_key))
4043 {
4044 /*
4045 We come here when ref_key is not among usable_keys, try to find a
4046 usable prefix key of that key.
4047 */
4048 uint new_ref_key;
4049 /*
4050 If using index only read, only consider other possible index only
4051 keys
4052 */
4053 if (table->covering_keys.is_set(ref_key))
4054 usable_keys.intersect(table->covering_keys);
4055
4056 if ((new_ref_key= test_if_subkey(order, tab, ref_key, ref_key_parts,
4057 &usable_keys)) < MAX_KEY)
4058 {
4059 /* Found key that can be used to retrieve data in sorted order */
4060 if (tab->ref.key >= 0)
4061 {
4062 /*
4063 We'll use ref access method on key new_ref_key. The actual change
4064 is done further down in this function where we update the plan.
4065 */
4066 set_up_ref_access_to_key= true;
4067 }
4068 else if (!no_changes)
4069 {
4070 /*
4071 The range optimizer constructed QUICK_RANGE for ref_key, and
4072 we want to use instead new_ref_key as the index. We can't
4073 just change the index of the quick select, because this may
4074 result in an incosistent QUICK_SELECT object. Below we
4075 create a new QUICK_SELECT from scratch so that all its
4076 parameres are set correctly by the range optimizer.
4077
4078 Note that the range optimizer is NOT called if
4079 no_changes==true. This reason is that the range optimizer
4080 cannot find a QUICK that can return ordered result unless
4081 index access (ref or index scan) is also able to do so
4082 (which test_if_order_by_key () will tell).
4083 Admittedly, range access may be much more efficient than
4084 e.g. index scan, but the only thing that matters when
4085 no_change==true is the answer to the question: "Is it
4086 possible to avoid sorting if an index is used to access
4087 this table?". The answer does not depend on the outcome of
4088 the range optimizer.
4089 */
4090 key_map new_ref_key_map; // Force the creation of quick select
4091 new_ref_key_map.set_bit(new_ref_key); // only for new_ref_key.
4092
4093 Opt_trace_object
4094 trace_recest(trace, "rows_estimation");
4095 trace_recest.add_utf8_table(tab->table).
4096 add_utf8("index", table->key_info[new_ref_key].name);
4097 select->quick= 0;
4098 if (select->test_quick_select(tab->join->thd,
4099 new_ref_key_map,
4100 0, // empty table_map
4101 (tab->join->select_options &
4102 OPTION_FOUND_ROWS) ?
4103 HA_POS_ERROR :
4104 tab->join->unit->select_limit_cnt,
4105 false, // don't force quick range
4106 order->direction) <= 0)
4107 {
4108 can_skip_sorting= false;
4109 goto fix_ICP;
4110 }
4111 }
4112 ref_key= new_ref_key;
4113 changed_key= new_ref_key;
4114 }
4115 }
4116 /* Check if we get the rows in requested sorted order by using the key */
4117 if (usable_keys.is_set(ref_key) &&
4118 (order_direction= test_if_order_by_key(order,table,ref_key,
4119 &used_key_parts)))
4120 goto check_reverse_order;
4121 }
4122 {
4123 /*
4124 There was no {ref or or ordered range access} key, or it was not
4125 satisfying, neither was any prefix of it. Do a cost-based search on all
4126 keys:
4127 */
4128 uint best_key_parts= 0;
4129 uint saved_best_key_parts= 0;
4130 int best_key_direction= 0;
4131 JOIN *join= tab->join;
4132 ha_rows table_records= table->file->stats.records;
4133
4134 test_if_cheaper_ordering(tab, order, table, usable_keys,
4135 ref_key, select_limit,
4136 &best_key, &best_key_direction,
4137 &select_limit, &best_key_parts,
4138 &saved_best_key_parts);
4139
4140 if (best_key < 0)
4141 {
4142 // No usable key has been found
4143 can_skip_sorting= false;
4144 goto fix_ICP;
4145 }
4146
4147 /*
4148 Does the query have a "FORCE INDEX [FOR GROUP BY] (idx)" (if
4149 clause is group by) or a "FORCE INDEX [FOR ORDER BY] (idx)" (if
4150 clause is order by)?
4151 */
4152 const bool is_group_by= join && join->group && order == join->group_list;
4153 const bool is_force_index= table->force_index ||
4154 (is_group_by ? table->force_index_group : table->force_index_order);
4155
4156 /*
4157 filesort() and join cache are usually faster than reading in
4158 index order and not using join cache. Don't use index scan
4159 unless:
4160 - the user specified FORCE INDEX [FOR {GROUP|ORDER} BY] (have to assume
4161 the user knows what's best)
4162 - the chosen index is clustered primary key (table scan is not cheaper)
4163 */
4164 if (!is_force_index &&
4165 (select_limit >= table_records) &&
4166 (tab->type == JT_ALL &&
4167 tab->join->primary_tables > tab->join->const_tables + 1) &&
4168 ((unsigned) best_key != table->s->primary_key ||
4169 !table->file->primary_key_is_clustered()) &&
4170 !(best_key >= 0
4171 && (table->file->index_flags(best_key, 0, 0) & HA_CLUSTERED_INDEX)))
4172 {
4173 can_skip_sorting= false;
4174 goto fix_ICP;
4175 }
4176
4177 if (select &&
4178 table->quick_keys.is_set(best_key) &&
4179 !tab->quick_order_tested.is_set(best_key) &&
4180 best_key != ref_key)
4181 {
4182 tab->quick_order_tested.set_bit(best_key);
4183 Opt_trace_object
4184 trace_recest(trace, "rows_estimation");
4185 trace_recest.add_utf8_table(tab->table).
4186 add_utf8("index", table->key_info[best_key].name);
4187
4188 key_map map; // Force the creation of quick select
4189 map.set_bit(best_key); // only best_key.
4190 select->quick= 0;
4191 select->test_quick_select(join->thd,
4192 map,
4193 0, // empty table_map
4194 join->select_options & OPTION_FOUND_ROWS ?
4195 HA_POS_ERROR :
4196 join->unit->select_limit_cnt,
4197 true, // force quick range
4198 order->direction);
4199 }
4200 order_direction= best_key_direction;
4201 /*
4202 saved_best_key_parts is actual number of used keyparts found by the
4203 test_if_order_by_key function. It could differ from keyinfo->key_parts,
4204 thus we have to restore it in case of desc order as it affects
4205 QUICK_SELECT_DESC behaviour.
4206 */
4207 used_key_parts= (order_direction == -1) ?
4208 saved_best_key_parts : best_key_parts;
4209 changed_key= best_key;
4210 // We will use index scan or range scan:
4211 set_up_ref_access_to_key= false;
4212 }
4213
4214 check_reverse_order:
4215 DBUG_ASSERT(order_direction != 0);
4216
4217 if (order_direction == -1) // If ORDER BY ... DESC
4218 {
4219 if (select && select->quick)
4220 {
4221 /*
4222 Don't reverse the sort order, if it's already done.
4223 (In some cases test_if_order_by_key() can be called multiple times
4224 */
4225 if (select->quick->reverse_sorted())
4226 {
4227 can_skip_sorting= true;
4228 goto fix_ICP;
4229 }
4230
4231 if (select->quick->reverse_sort_possible())
4232 can_skip_sorting= true;
4233 else
4234 {
4235 can_skip_sorting= false;
4236 goto fix_ICP;
4237 }
4238
4239 /*
4240 test_quick_select() should not create a quick that cannot do
4241 reverse ordering
4242 */
4243 DBUG_ASSERT((select->quick == save_quick) || can_skip_sorting);
4244 }
4245 else
4246 {
4247 // Other index access (ref or scan) poses no problem
4248 can_skip_sorting= true;
4249 }
4250 }
4251 else
4252 {
4253 // ORDER BY ASC poses no problem
4254 can_skip_sorting= true;
4255 }
4256
4257 DBUG_ASSERT(can_skip_sorting);
4258
4259 /*
4260 Update query plan with access pattern for doing
4261 ordered access according to what we have decided
4262 above.
4263 */
4264 if (!no_changes) // We are allowed to update QEP
4265 {
4266 if (set_up_ref_access_to_key)
4267 {
4268 /*
4269 We'll use ref access method on key changed_key. In general case
4270 the index search tuple for changed_ref_key will be different (e.g.
4271 when one index is defined as (part1, part2, ...) and another as
4272 (part1, part2(N), ...) and the WHERE clause contains
4273 "part1 = const1 AND part2=const2".
4274 So we build tab->ref from scratch here.
4275 */
4276 Key_use *keyuse= tab->keyuse;
4277 while (keyuse->key != (uint)changed_key && keyuse->table == tab->table)
4278 keyuse++;
4279
4280 if (create_ref_for_key(tab->join, tab, keyuse, tab->prefix_tables()))
4281 {
4282 can_skip_sorting= false;
4283 goto fix_ICP;
4284 }
4285
4286 DBUG_ASSERT(tab->type != JT_REF_OR_NULL && tab->type != JT_FT);
4287 }
4288 else if (best_key >= 0)
4289 {
4290 bool quick_created=
4291 (select && select->quick && select->quick!=save_quick);
4292
4293 /*
4294 If ref_key used index tree reading only ('Using index' in EXPLAIN),
4295 and best_key doesn't, then revert the decision.
4296 */
4297 if(!table->covering_keys.is_set(best_key))
4298 table->set_keyread(false);
4299 if (!quick_created)
4300 {
4301 if (select) // Throw any existing quick select
4302 select->quick= 0; // Cleanup either reset to save_quick,
4303 // or 'delete save_quick'
4304 tab->index= best_key;
4305 tab->read_first_record= order_direction > 0 ?
4306 join_read_first:join_read_last;
4307 tab->type=JT_INDEX_SCAN; // Read with index_first(), index_next()
4308
4309 table->file->ha_index_or_rnd_end();
4310 if (tab->join->select_options & SELECT_DESCRIBE)
4311 {
4312 /*
4313 @todo this neutralizes add_ref_to_table_cond(); as a result
4314 EXPLAIN shows no "using where" though real SELECT has one.
4315 */
4316 tab->ref.key= -1;
4317 tab->ref.key_parts= 0;
4318 if (select_limit < table->file->stats.records)
4319 tab->limit= select_limit;
4320 }
4321 }
4322 else if (tab->type != JT_ALL)
4323 {
4324 /*
4325 We're about to use a quick access to the table.
4326 We need to change the access method so as the quick access
4327 method is actually used.
4328 */
4329 DBUG_ASSERT(tab->select->quick);
4330 DBUG_ASSERT(tab->select->quick->index==(uint)best_key);
4331 tab->type=JT_ALL;
4332 tab->use_quick=QS_RANGE;
4333 tab->ref.key= -1;
4334 tab->ref.key_parts=0; // Don't use ref key.
4335 tab->read_first_record= join_init_read_record;
4336 if (tab->is_using_loose_index_scan())
4337 tab->join->tmp_table_param.precomputed_group_by= TRUE;
4338 /*
4339 TODO: update the number of records in tab->position
4340 */
4341 }
4342 } // best_key >= 0
4343
4344 if (order_direction == -1) // If ORDER BY ... DESC
4345 {
4346 if (select && select->quick)
4347 {
4348 /* ORDER BY range_key DESC */
4349 QUICK_SELECT_I *tmp= select->quick->make_reverse(used_key_parts);
4350 if (!tmp)
4351 {
4352 tab->limit= 0;
4353 can_skip_sorting= false; // Reverse sort failed -> filesort
4354 goto fix_ICP;
4355 }
4356 if (select->quick == save_quick)
4357 save_quick= 0; // Because set_quick(tmp) frees it
4358 select->set_quick(tmp);
4359 }
4360 else if (tab->type != JT_INDEX_SCAN && tab->type != JT_REF_OR_NULL &&
4361 tab->ref.key >= 0 && tab->ref.key_parts <= used_key_parts)
4362 {
4363 /*
4364 SELECT * FROM t1 WHERE a=1 ORDER BY a DESC,b DESC
4365
4366 Use a traversal function that starts by reading the last row
4367 with key part (A) and then traverse the index backwards.
4368 */
4369 tab->read_first_record= join_read_last_key;
4370 tab->read_record.read_record= join_read_prev_same;
4371 tab->read_record.unlock_row= rr_unlock_row;
4372
4373 /*
4374 The current implementation of join_read_prev_same() does not
4375 work well in combination with ICP and can lead to increased
4376 execution time. Setting changed_key to the current key
4377 (based on that we change the access order for the key) will
4378 ensure that a pushed index condition will be cancelled.
4379 */
4380 changed_key= tab->ref.key;
4381 }
4382 }
4383 else if (select && select->quick)
4384 select->quick->need_sorted_output();
4385 } // QEP has been modified
4386
4387 fix_ICP:
4388 /*
4389 Cleanup:
4390 We may have both a 'select->quick' and 'save_quick' (original)
4391 at this point. Delete the one that we won't use.
4392 */
4393 if (can_skip_sorting && !no_changes)
4394 {
4395 // Keep current (ordered) select->quick
4396 if (select && save_quick != select->quick)
4397 delete save_quick;
4398 }
4399 else
4400 {
4401 // Restore original save_quick
4402 if (select && select->quick != save_quick)
4403 select->set_quick(save_quick);
4404 }
4405
4406 Opt_trace_object
4407 trace_change_index(trace, "index_order_summary");
4408 trace_change_index.add_utf8_table(tab->table)
4409 .add("index_provides_order", can_skip_sorting)
4410 .add_alnum("order_direction", order_direction == 1 ? "asc" :
4411 ((order_direction == -1) ? "desc" :
4412 "undefined"));
4413
4414 if (changed_key >= 0)
4415 {
4416 bool cancelled_ICP= false;
4417 // switching to another index, makes pushed index condition obsolete
4418 if (!no_changes && table->file->pushed_idx_cond)
4419 {
4420 table->file->cancel_pushed_idx_cond();
4421 // and thus tab's m_condition must be how it was before ICP
4422 orig_cond_saved= false;
4423 cancelled_ICP= true;
4424 }
4425 if (unlikely(trace->is_started()))
4426 {
4427 if (cancelled_ICP)
4428 trace_change_index.add("disabled_pushed_condition_on_old_index", true);
4429 trace_change_index.add_utf8("index", table->key_info[changed_key].name);
4430 trace_change_index.add("plan_changed", !no_changes);
4431 if (!no_changes)
4432 {
4433 const char *new_type= tab->type == JT_INDEX_SCAN ? "index_scan" :
4434 (tab->select && tab->select->quick) ?
4435 "range" : join_type_str[tab->type];
4436 trace_change_index.add_alnum("access_type", new_type);
4437 }
4438 }
4439 }
4440 else if (unlikely(trace->is_started()))
4441 {
4442 trace_change_index.add_utf8("index",
4443 ref_key >= 0 ?
4444 table->key_info[ref_key].name : "unknown");
4445 trace_change_index.add("plan_changed", false);
4446 }
4447 if (orig_cond_saved)
4448 {
4449 // ICP set up prior to the call, is still valid:
4450 tab->set_jt_and_sel_condition(orig_cond, __LINE__);
4451 }
4452 DBUG_RETURN(can_skip_sorting);
4453 }
4454
4455
4456
4457 /**
4458 Update join with count of the different type of fields.
4459 */
4460
4461 void
count_field_types(SELECT_LEX * select_lex,TMP_TABLE_PARAM * param,List<Item> & fields,bool reset_with_sum_func)4462 count_field_types(SELECT_LEX *select_lex, TMP_TABLE_PARAM *param,
4463 List<Item> &fields, bool reset_with_sum_func)
4464 {
4465 List_iterator<Item> li(fields);
4466 Item *field;
4467
4468 param->field_count= 0;
4469 param->sum_func_count= 0;
4470 param->func_count= 0;
4471 param->hidden_field_count= 0;
4472 param->outer_sum_func_count= 0;
4473 param->quick_group=1;
4474 while ((field=li++))
4475 {
4476 Item::Type real_type= field->real_item()->type();
4477 if (real_type == Item::FIELD_ITEM)
4478 param->field_count++;
4479 else if (real_type == Item::SUM_FUNC_ITEM)
4480 {
4481 if (! field->const_item())
4482 {
4483 Item_sum *sum_item=(Item_sum*) field->real_item();
4484 if (!sum_item->depended_from() ||
4485 sum_item->depended_from() == select_lex)
4486 {
4487 if (!sum_item->quick_group)
4488 param->quick_group=0; // UDF SUM function
4489 param->sum_func_count++;
4490
4491 for (uint i=0 ; i < sum_item->get_arg_count() ; i++)
4492 {
4493 if (sum_item->get_arg(i)->real_item()->type() == Item::FIELD_ITEM)
4494 param->field_count++;
4495 else
4496 param->func_count++;
4497 }
4498 }
4499 param->func_count++;
4500 }
4501 }
4502 else
4503 {
4504 param->func_count++;
4505 if (reset_with_sum_func)
4506 field->with_sum_func=0;
4507 if (field->with_sum_func)
4508 param->outer_sum_func_count++;
4509 }
4510 }
4511 }
4512
4513
4514 /**
4515 Return 1 if second is a subpart of first argument.
4516
4517 If first parts has different direction, change it to second part
4518 (group is sorted like order)
4519 */
4520
4521 bool
test_if_subpart(ORDER * a,ORDER * b)4522 test_if_subpart(ORDER *a,ORDER *b)
4523 {
4524 for (; a && b; a=a->next,b=b->next)
4525 {
4526 if ((*a->item)->eq(*b->item,1))
4527 a->direction= b->direction;
4528 else
4529 return 0;
4530 }
4531 return MY_TEST(!b);
4532 }
4533
4534 /**
4535 calc how big buffer we need for comparing group entries.
4536 */
4537
4538 void
calc_group_buffer(JOIN * join,ORDER * group)4539 calc_group_buffer(JOIN *join,ORDER *group)
4540 {
4541 uint key_length=0, parts=0, null_parts=0;
4542
4543 if (group)
4544 join->group= 1;
4545 for (; group ; group=group->next)
4546 {
4547 Item *group_item= *group->item;
4548 Field *field= group_item->get_tmp_table_field();
4549 if (field)
4550 {
4551 enum_field_types type;
4552 if ((type= field->type()) == MYSQL_TYPE_BLOB)
4553 key_length+=MAX_BLOB_WIDTH; // Can't be used as a key
4554 else if (type == MYSQL_TYPE_VARCHAR || type == MYSQL_TYPE_VAR_STRING)
4555 key_length+= field->field_length + HA_KEY_BLOB_LENGTH;
4556 else if (type == MYSQL_TYPE_BIT)
4557 {
4558 /* Bit is usually stored as a longlong key for group fields */
4559 key_length+= 8; // Big enough
4560 }
4561 else
4562 key_length+= field->pack_length();
4563 }
4564 else
4565 {
4566 switch (group_item->result_type()) {
4567 case REAL_RESULT:
4568 key_length+= sizeof(double);
4569 break;
4570 case INT_RESULT:
4571 key_length+= sizeof(longlong);
4572 break;
4573 case DECIMAL_RESULT:
4574 key_length+= my_decimal_get_binary_size(group_item->max_length -
4575 (group_item->decimals ? 1 : 0),
4576 group_item->decimals);
4577 break;
4578 case STRING_RESULT:
4579 {
4580 /*
4581 As items represented as DATE/TIME fields in the group buffer
4582 have STRING_RESULT result type, we increase the length
4583 by 8 as maximum pack length of such fields.
4584 */
4585 if (group_item->is_temporal())
4586 {
4587 key_length+= 8;
4588 }
4589 else if (group_item->field_type() == MYSQL_TYPE_BLOB)
4590 key_length+= MAX_BLOB_WIDTH; // Can't be used as a key
4591 else
4592 {
4593 /*
4594 Group strings are taken as varstrings and require an length field.
4595 A field is not yet created by create_tmp_field()
4596 and the sizes should match up.
4597 */
4598 key_length+= group_item->max_length + HA_KEY_BLOB_LENGTH;
4599 }
4600 break;
4601 }
4602 default:
4603 /* This case should never be choosen */
4604 DBUG_ASSERT(0);
4605 my_error(ER_OUT_OF_RESOURCES, MYF(ME_FATALERROR));
4606 }
4607 }
4608 parts++;
4609 if (group_item->maybe_null)
4610 null_parts++;
4611 }
4612 join->tmp_table_param.group_length=key_length+null_parts;
4613 join->tmp_table_param.group_parts=parts;
4614 join->tmp_table_param.group_null_parts=null_parts;
4615 }
4616
4617
4618
4619 /**
4620 Make an array of pointers to sum_functions to speed up
4621 sum_func calculation.
4622
4623 @retval
4624 0 ok
4625 @retval
4626 1 Error
4627 */
4628
alloc_func_list()4629 bool JOIN::alloc_func_list()
4630 {
4631 uint func_count, group_parts;
4632 DBUG_ENTER("alloc_func_list");
4633
4634 func_count= tmp_table_param.sum_func_count;
4635 /*
4636 If we are using rollup, we need a copy of the summary functions for
4637 each level
4638 */
4639 if (rollup.state != ROLLUP::STATE_NONE)
4640 func_count*= (send_group_parts+1);
4641
4642 group_parts= send_group_parts;
4643 /*
4644 If distinct, reserve memory for possible
4645 disctinct->group_by optimization
4646 */
4647 if (select_distinct)
4648 {
4649 group_parts+= fields_list.elements;
4650 /*
4651 If the ORDER clause is specified then it's possible that
4652 it also will be optimized, so reserve space for it too
4653 */
4654 if (order)
4655 {
4656 ORDER *ord;
4657 for (ord= order; ord; ord= ord->next)
4658 group_parts++;
4659 }
4660 }
4661
4662 /* This must use calloc() as rollup_make_fields depends on this */
4663 sum_funcs= (Item_sum**) thd->calloc(sizeof(Item_sum**) * (func_count+1) +
4664 sizeof(Item_sum***) * (group_parts+1));
4665 sum_funcs_end= (Item_sum***) (sum_funcs + func_count + 1);
4666 DBUG_RETURN(sum_funcs == 0);
4667 }
4668
4669
4670 /**
4671 Initialize 'sum_funcs' array with all Item_sum objects.
4672
4673 @param field_list All items
4674 @param send_result_set_metadata Items in select list
4675 @param before_group_by Set to 1 if this is called before GROUP BY handling
4676 @param recompute Set to TRUE if sum_funcs must be recomputed
4677
4678 @retval
4679 0 ok
4680 @retval
4681 1 error
4682 */
4683
make_sum_func_list(List<Item> & field_list,List<Item> & send_result_set_metadata,bool before_group_by,bool recompute)4684 bool JOIN::make_sum_func_list(List<Item> &field_list,
4685 List<Item> &send_result_set_metadata,
4686 bool before_group_by, bool recompute)
4687 {
4688 List_iterator_fast<Item> it(field_list);
4689 Item_sum **func;
4690 Item *item;
4691 DBUG_ENTER("make_sum_func_list");
4692
4693 if (*sum_funcs && !recompute)
4694 DBUG_RETURN(FALSE); /* We have already initialized sum_funcs. */
4695
4696 func= sum_funcs;
4697 while ((item=it++))
4698 {
4699 if (item->type() == Item::SUM_FUNC_ITEM && !item->const_item() &&
4700 (!((Item_sum*) item)->depended_from() ||
4701 ((Item_sum *)item)->depended_from() == select_lex))
4702 *func++= (Item_sum*) item;
4703 }
4704 if (before_group_by && rollup.state == ROLLUP::STATE_INITED)
4705 {
4706 rollup.state= ROLLUP::STATE_READY;
4707 if (rollup_make_fields(field_list, send_result_set_metadata, &func))
4708 DBUG_RETURN(TRUE); // Should never happen
4709 }
4710 else if (rollup.state == ROLLUP::STATE_NONE)
4711 {
4712 for (uint i=0 ; i <= send_group_parts ;i++)
4713 sum_funcs_end[i]= func;
4714 }
4715 else if (rollup.state == ROLLUP::STATE_READY)
4716 DBUG_RETURN(FALSE); // Don't put end marker
4717 *func=0; // End marker
4718 DBUG_RETURN(FALSE);
4719 }
4720
4721
4722 /**
4723 Free joins of subselect of this select.
4724
4725 @param thd THD pointer
4726 @param select pointer to st_select_lex which subselects joins we will free
4727 */
4728
free_underlaid_joins(THD * thd,SELECT_LEX * select)4729 void free_underlaid_joins(THD *thd, SELECT_LEX *select)
4730 {
4731 for (SELECT_LEX_UNIT *unit= select->first_inner_unit();
4732 unit;
4733 unit= unit->next_unit())
4734 unit->cleanup();
4735 }
4736
4737 /****************************************************************************
4738 ROLLUP handling
4739 ****************************************************************************/
4740
4741 /**
4742 Wrap all constant Items in GROUP BY list.
4743
4744 For ROLLUP queries each constant item referenced in GROUP BY list
4745 is wrapped up into an Item_func object yielding the same value
4746 as the constant item. The objects of the wrapper class are never
4747 considered as constant items and besides they inherit all
4748 properties of the Item_result_field class.
4749 This wrapping allows us to ensure writing constant items
4750 into temporary tables whenever the result of the ROLLUP
4751 operation has to be written into a temporary table, e.g. when
4752 ROLLUP is used together with DISTINCT in the SELECT list.
4753 Usually when creating temporary tables for a intermidiate
4754 result we do not include fields for constant expressions.
4755
4756 @retval
4757 0 if ok
4758 @retval
4759 1 on error
4760 */
4761
rollup_process_const_fields()4762 bool JOIN::rollup_process_const_fields()
4763 {
4764 ORDER *group_tmp;
4765 Item *item;
4766 List_iterator<Item> it(all_fields);
4767
4768 for (group_tmp= group_list; group_tmp; group_tmp= group_tmp->next)
4769 {
4770 if (!(*group_tmp->item)->const_item())
4771 continue;
4772 while ((item= it++))
4773 {
4774 if (*group_tmp->item == item)
4775 {
4776 Item* new_item= new Item_func_rollup_const(item);
4777 if (!new_item)
4778 return 1;
4779 new_item->fix_fields(thd, (Item **) 0);
4780 thd->change_item_tree(it.ref(), new_item);
4781 for (ORDER *tmp= group_tmp; tmp; tmp= tmp->next)
4782 {
4783 if (*tmp->item == item)
4784 thd->change_item_tree(tmp->item, new_item);
4785 }
4786 break;
4787 }
4788 }
4789 it.rewind();
4790 }
4791 return 0;
4792 }
4793
4794
4795 /**
4796 Fill up rollup structures with pointers to fields to use.
4797
4798 Creates copies of item_sum items for each sum level.
4799
4800 @param fields_arg List of all fields (hidden and real ones)
4801 @param sel_fields Pointer to selected fields
4802 @param func Store here a pointer to all fields
4803
4804 @retval
4805 0 if ok;
4806 In this case func is pointing to next not used element.
4807 @retval
4808 1 on error
4809 */
4810
rollup_make_fields(List<Item> & fields_arg,List<Item> & sel_fields,Item_sum *** func)4811 bool JOIN::rollup_make_fields(List<Item> &fields_arg, List<Item> &sel_fields,
4812 Item_sum ***func)
4813 {
4814 List_iterator_fast<Item> it(fields_arg);
4815 Item *first_field= sel_fields.head();
4816 uint level;
4817
4818 /*
4819 Create field lists for the different levels
4820
4821 The idea here is to have a separate field list for each rollup level to
4822 avoid all runtime checks of which columns should be NULL.
4823
4824 The list is stored in reverse order to get sum function in such an order
4825 in func that it makes it easy to reset them with init_sum_functions()
4826
4827 Assuming: SELECT a, b, c SUM(b) FROM t1 GROUP BY a,b WITH ROLLUP
4828
4829 rollup.fields[0] will contain list where a,b,c is NULL
4830 rollup.fields[1] will contain list where b,c is NULL
4831 ...
4832 rollup.ref_pointer_array[#] points to fields for rollup.fields[#]
4833 ...
4834 sum_funcs_end[0] points to all sum functions
4835 sum_funcs_end[1] points to all sum functions, except grand totals
4836 ...
4837 */
4838
4839 for (level=0 ; level < send_group_parts ; level++)
4840 {
4841 uint i;
4842 uint pos= send_group_parts - level -1;
4843 bool real_fields= 0;
4844 Item *item;
4845 List_iterator<Item> new_it(rollup.fields[pos]);
4846 Ref_ptr_array ref_array_start= rollup.ref_pointer_arrays[pos];
4847 ORDER *start_group;
4848
4849 /* Point to first hidden field */
4850 uint ref_array_ix= fields_arg.elements-1;
4851
4852 /* Remember where the sum functions ends for the previous level */
4853 sum_funcs_end[pos+1]= *func;
4854
4855 /* Find the start of the group for this level */
4856 for (i= 0, start_group= group_list ;
4857 i++ < pos ;
4858 start_group= start_group->next)
4859 ;
4860
4861 it.rewind();
4862 while ((item= it++))
4863 {
4864 if (item == first_field)
4865 {
4866 real_fields= 1; // End of hidden fields
4867 ref_array_ix= 0;
4868 }
4869
4870 if (item->type() == Item::SUM_FUNC_ITEM && !item->const_item() &&
4871 (!((Item_sum*) item)->depended_from() ||
4872 ((Item_sum *)item)->depended_from() == select_lex))
4873
4874 {
4875 /*
4876 This is a top level summary function that must be replaced with
4877 a sum function that is reset for this level.
4878
4879 NOTE: This code creates an object which is not that nice in a
4880 sub select. Fortunately it's not common to have rollup in
4881 sub selects.
4882 */
4883 item= item->copy_or_same(thd);
4884 ((Item_sum*) item)->make_unique();
4885 *(*func)= (Item_sum*) item;
4886 (*func)++;
4887 }
4888 else
4889 {
4890 /* Check if this is something that is part of this group by */
4891 ORDER *group_tmp;
4892 for (group_tmp= start_group, i= pos ;
4893 group_tmp ; group_tmp= group_tmp->next, i++)
4894 {
4895 if (*group_tmp->item == item)
4896 {
4897 /*
4898 This is an element that is used by the GROUP BY and should be
4899 set to NULL in this level
4900 */
4901 Item_null_result *null_item=
4902 new (thd->mem_root) Item_null_result(item->field_type(),
4903 item->result_type());
4904 if (!null_item)
4905 return 1;
4906 item->maybe_null= 1; // Value will be null sometimes
4907 null_item->result_field= item->get_tmp_table_field();
4908 item= null_item;
4909 break;
4910 }
4911 }
4912 }
4913 ref_array_start[ref_array_ix]= item;
4914 if (real_fields)
4915 {
4916 (void) new_it++; // Point to next item
4917 new_it.replace(item); // Replace previous
4918 ref_array_ix++;
4919 }
4920 else
4921 ref_array_ix--;
4922 }
4923 }
4924 sum_funcs_end[0]= *func; // Point to last function
4925 return 0;
4926 }
4927
4928
4929 /**
4930 clear results if there are not rows found for group
4931 (end_send_group/end_write_group)
4932 */
4933
clear()4934 void JOIN::clear()
4935 {
4936 /*
4937 must clear only the non-const tables, as const tables
4938 are not re-calculated.
4939 */
4940 for (uint tableno= const_tables; tableno < primary_tables; tableno++)
4941 mark_as_null_row(join_tab[tableno].table); // All fields are NULL
4942
4943 copy_fields(&tmp_table_param);
4944
4945 if (sum_funcs)
4946 {
4947 Item_sum *func, **func_ptr= sum_funcs;
4948 while ((func= *(func_ptr++)))
4949 func->clear();
4950 }
4951 }
4952
4953
4954 /**
4955 change select_result object of JOIN.
4956
4957 @param res new select_result object
4958
4959 @retval
4960 FALSE OK
4961 @retval
4962 TRUE error
4963 */
4964
change_result(select_result * res)4965 bool JOIN::change_result(select_result *res)
4966 {
4967 DBUG_ENTER("JOIN::change_result");
4968 result= res;
4969 if (result->prepare(fields_list, select_lex->master_unit()) ||
4970 result->prepare2())
4971 {
4972 DBUG_RETURN(TRUE);
4973 }
4974 DBUG_RETURN(FALSE);
4975 }
4976
4977 /**
4978 Add having condition as a where clause condition of the given temp table.
4979
4980 @param curr_tmp_table Table number to which having conds are added.
4981
4982 @returns false if success, true if error.
4983 */
4984
add_having_as_tmp_table_cond(uint curr_tmp_table)4985 bool JOIN::add_having_as_tmp_table_cond(uint curr_tmp_table)
4986 {
4987 having->update_used_tables();
4988 JOIN_TAB *curr_table= &join_tab[curr_tmp_table];
4989 table_map used_tables= curr_table->table->map | OUTER_REF_TABLE_BIT;
4990
4991 /* If tmp table is not used then consider conditions of const table also */
4992 if (!need_tmp)
4993 used_tables|= const_table_map;
4994
4995 DBUG_ENTER("JOIN::add_having_as_table_conds");
4996
4997 Item* sort_table_cond= make_cond_for_table(having, used_tables,
4998 (table_map) 0, false);
4999 if (sort_table_cond)
5000 {
5001 if (!curr_table->select)
5002 if (!(curr_table->select= new SQL_SELECT))
5003 DBUG_RETURN(true);
5004 if (!curr_table->select->cond)
5005 curr_table->select->cond= sort_table_cond;
5006 else
5007 {
5008 if (!(curr_table->select->cond=
5009 new Item_cond_and(curr_table->select->cond,
5010 sort_table_cond)))
5011 DBUG_RETURN(true);
5012 curr_table->select->cond->fix_fields(thd, 0);
5013 }
5014 curr_table->set_condition(curr_table->select->cond, __LINE__);
5015 curr_table->condition()->top_level_item();
5016 DBUG_EXECUTE("where",print_where(curr_table->select->cond,
5017 "select and having",
5018 QT_ORDINARY););
5019
5020 having= make_cond_for_table(having, ~ (table_map) 0,
5021 ~used_tables, false);
5022 DBUG_EXECUTE("where",
5023 print_where(having, "having after sort", QT_ORDINARY););
5024 }
5025
5026 DBUG_RETURN(false);
5027 }
5028
5029 /**
5030 Init tmp tables usage info.
5031
5032 @details
5033 This function finalizes execution plan by taking following actions:
5034 .) tmp tables are created, but not instantiated (this is done during
5035 execution). JOIN_TABs dedicated to tmp tables are filled appropriately.
5036 see JOIN::create_intermediate_table.
5037 .) prepare fields lists (fields, all_fields, ref_pointer_array slices) for
5038 each required stage of execution. These fields lists are set for
5039 tmp tables' tabs and for the tab of last table in the join.
5040 .) fill info for sorting/grouping/dups removal is prepared and saved to
5041 appropriate tabs. Here is an example:
5042 SELECT * from t1,t2 WHERE ... GROUP BY t1.f1 ORDER BY t2.f2, t1.f2
5043 and lets assume that the table order in the plan is t1,t2.
5044 In this case optimizer will sort for group only the first table as the
5045 second one isn't mentioned in GROUP BY. The result will be materialized
5046 in tmp table. As filesort can't sort join optimizer will sort tmp table
5047 also. The first sorting (for group) is called simple as is doesn't
5048 require tmp table. The Filesort object for it is created here - in
5049 JOIN::create_intermediate_table. Filesort for the second case is
5050 created here, in JOIN::make_tmp_tables_info.
5051 @returns
5052 false - Ok
5053 true - Error
5054 */
5055
make_tmp_tables_info()5056 bool JOIN::make_tmp_tables_info()
5057 {
5058 List<Item> *curr_all_fields= &all_fields;
5059 List<Item> *curr_fields_list= &fields_list;
5060 bool materialize_join= false;
5061 uint curr_tmp_table= const_tables;
5062 TABLE *exec_tmp_table= NULL;
5063 DBUG_ENTER("JOIN::make_tmp_tables_info");
5064 having_for_explain= having;
5065
5066 const bool has_group_by= this->group;
5067 /*
5068 Setup last table to provide fields and all_fields lists to the next
5069 node in the plan.
5070 */
5071 if (join_tab)
5072 {
5073 join_tab[primary_tables - 1].fields= &fields_list;
5074 join_tab[primary_tables - 1].all_fields= &all_fields;
5075 }
5076 /*
5077 The loose index scan access method guarantees that all grouping or
5078 duplicate row elimination (for distinct) is already performed
5079 during data retrieval, and that all MIN/MAX functions are already
5080 computed for each group. Thus all MIN/MAX functions should be
5081 treated as regular functions, and there is no need to perform
5082 grouping in the main execution loop.
5083 Notice that currently loose index scan is applicable only for
5084 single table queries, thus it is sufficient to test only the first
5085 join_tab element of the plan for its access method.
5086 */
5087 if (join_tab && join_tab->is_using_loose_index_scan())
5088 tmp_table_param.precomputed_group_by=
5089 !join_tab->is_using_agg_loose_index_scan();
5090
5091 /* Create a tmp table if distinct or if the sort is too complicated */
5092 if (need_tmp)
5093 {
5094 curr_tmp_table= primary_tables;
5095 tmp_tables++;
5096 if (plan_is_const())
5097 first_select= sub_select_op;
5098
5099 /*
5100 Create temporary table on first execution of this join.
5101 (Will be reused if this is a subquery that is executed several times.)
5102 */
5103 init_items_ref_array();
5104
5105 ORDER_with_src tmp_group;
5106 if (!simple_group && !(test_flags & TEST_NO_KEY_GROUP))
5107 tmp_group= group_list;
5108
5109 tmp_table_param.hidden_field_count=
5110 all_fields.elements - fields_list.elements;
5111
5112 if (create_intermediate_table(&join_tab[curr_tmp_table],
5113 &all_fields, tmp_group,
5114 group_list && simple_group))
5115 DBUG_RETURN(true);
5116 exec_tmp_table= join_tab[curr_tmp_table].table;
5117
5118 if (exec_tmp_table->distinct)
5119 optimize_distinct();
5120
5121 /*
5122 If there is no sorting or grouping, 'use_order'
5123 index result should not have been requested.
5124 Exception: LooseScan strategy for semijoin requires
5125 sorted access even if final result is not to be sorted.
5126 */
5127 DBUG_ASSERT(
5128 !(ordered_index_usage == ordered_index_void &&
5129 !plan_is_const() &&
5130 join_tab[const_tables].position->sj_strategy != SJ_OPT_LOOSE_SCAN &&
5131 join_tab[const_tables].use_order()));
5132
5133 /* Change sum_fields reference to calculated fields in tmp_table */
5134 DBUG_ASSERT(items1.is_null());
5135 items1= ref_ptr_array_slice(2);
5136 if (sort_and_group || join_tab[curr_tmp_table].table->group ||
5137 tmp_table_param.precomputed_group_by)
5138 {
5139 if (change_to_use_tmp_fields(thd, items1,
5140 tmp_fields_list1, tmp_all_fields1,
5141 fields_list.elements, all_fields))
5142 DBUG_RETURN(true);
5143 }
5144 else
5145 {
5146 if (change_refs_to_tmp_fields(thd, items1,
5147 tmp_fields_list1, tmp_all_fields1,
5148 fields_list.elements, all_fields))
5149 DBUG_RETURN(true);
5150 }
5151 curr_all_fields= &tmp_all_fields1;
5152 curr_fields_list= &tmp_fields_list1;
5153 // Need to set them now for correct group_fields setup, reset at the end.
5154 set_items_ref_array(items1);
5155 join_tab[curr_tmp_table].ref_array= &items1;
5156 join_tab[curr_tmp_table].all_fields= &tmp_all_fields1;
5157 join_tab[curr_tmp_table].fields= &tmp_fields_list1;
5158 setup_tmptable_write_func(&join_tab[curr_tmp_table]);
5159
5160 /*
5161 If having is not handled here, it will be checked before the row is sent
5162 to the client.
5163 */
5164 if (having &&
5165 (sort_and_group || (exec_tmp_table->distinct && !group_list)))
5166 {
5167 /*
5168 If there is no select distinct then move the having to table conds of
5169 tmp table.
5170 NOTE : We cannot apply having after distinct. If columns of having are
5171 not part of select distinct, then distinct may remove rows
5172 which can satisfy having.
5173 */
5174 if (!select_distinct && add_having_as_tmp_table_cond(curr_tmp_table))
5175 DBUG_RETURN(true);
5176
5177 /*
5178 Having condition which we are not able to add as tmp table conds are
5179 kept as before. And, this will be applied before storing the rows in
5180 tmp table.
5181 */
5182 join_tab[curr_tmp_table].having= having;
5183 having= NULL; // Already done
5184 }
5185
5186 tmp_table_param.func_count= 0;
5187 tmp_table_param.field_count+= tmp_table_param.func_count;
5188 if (sort_and_group || join_tab[curr_tmp_table].table->group)
5189 {
5190 tmp_table_param.field_count+= tmp_table_param.sum_func_count;
5191 tmp_table_param.sum_func_count= 0;
5192 }
5193
5194 if (exec_tmp_table->group)
5195 { // Already grouped
5196 if (!order && !no_order && !skip_sort_order)
5197 order= group_list; /* order by group */
5198 group_list= NULL;
5199 }
5200 /*
5201 If we have different sort & group then we must sort the data by group
5202 and copy it to another tmp table
5203 This code is also used if we are using distinct something
5204 we haven't been able to store in the temporary table yet
5205 like SEC_TO_TIME(SUM(...)).
5206 */
5207
5208 if ((group_list &&
5209 (!test_if_subpart(group_list, order) || select_distinct)) ||
5210 (select_distinct && tmp_table_param.using_outer_summary_function))
5211 { /* Must copy to another table */
5212 DBUG_PRINT("info",("Creating group table"));
5213
5214 calc_group_buffer(this, group_list);
5215 count_field_types(select_lex, &tmp_table_param, tmp_all_fields1,
5216 select_distinct && !group_list);
5217 tmp_table_param.hidden_field_count=
5218 tmp_all_fields1.elements - tmp_fields_list1.elements;
5219 sort_and_group= false;
5220 if (!exec_tmp_table->group && !exec_tmp_table->distinct)
5221 {
5222 // 1st tmp table were materializing join result
5223 materialize_join= true;
5224 explain_flags.set(ESC_BUFFER_RESULT, ESP_USING_TMPTABLE);
5225 }
5226 curr_tmp_table++;
5227 tmp_tables++;
5228
5229 /* group data to new table */
5230 /*
5231 If the access method is loose index scan then all MIN/MAX
5232 functions are precomputed, and should be treated as regular
5233 functions. See extended comment above.
5234 */
5235 if (join_tab->is_using_loose_index_scan())
5236 tmp_table_param.precomputed_group_by= TRUE;
5237
5238 tmp_table_param.hidden_field_count=
5239 curr_all_fields->elements - curr_fields_list->elements;
5240 ORDER_with_src dummy= NULL; //TODO can use table->group here also
5241
5242 if (create_intermediate_table(&join_tab[curr_tmp_table],
5243 curr_all_fields, dummy, true))
5244 DBUG_RETURN(true);
5245
5246 if (group_list)
5247 {
5248 explain_flags.set(group_list.src, ESP_USING_TMPTABLE);
5249 if (!plan_is_const()) // No need to sort a single row
5250 {
5251 JOIN_TAB *sort_tab= &join_tab[curr_tmp_table - 1];
5252 if (add_sorting_to_table(sort_tab, &group_list))
5253 DBUG_RETURN(true);
5254 }
5255
5256 if (make_group_fields(this, this))
5257 DBUG_RETURN(true);
5258 }
5259
5260 /*
5261 If there is no sorting or grouping, 'use_order'
5262 index result should not have been requested.
5263 */
5264 DBUG_ASSERT(!(ordered_index_usage == ordered_index_void &&
5265 !plan_is_const() &&
5266 join_tab[const_tables].use_order()));
5267
5268 // Setup sum funcs only when necessary, otherwise we might break info
5269 // for the first table
5270 if (group_list || tmp_table_param.sum_func_count)
5271 {
5272 if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true, true))
5273 DBUG_RETURN(true);
5274 if (prepare_sum_aggregators(sum_funcs,
5275 !join_tab->is_using_agg_loose_index_scan()))
5276 DBUG_RETURN(true);
5277 group_list= NULL;
5278 if (setup_sum_funcs(thd, sum_funcs))
5279 DBUG_RETURN(true);
5280 }
5281 // No sum funcs anymore
5282 DBUG_ASSERT(items2.is_null());
5283
5284 items2= ref_ptr_array_slice(3);
5285 if (change_to_use_tmp_fields(thd, items2,
5286 tmp_fields_list2, tmp_all_fields2,
5287 fields_list.elements, tmp_all_fields1))
5288 DBUG_RETURN(true);
5289
5290 curr_fields_list= &tmp_fields_list2;
5291 curr_all_fields= &tmp_all_fields2;
5292 set_items_ref_array(items2);
5293 join_tab[curr_tmp_table].ref_array= &items2;
5294 join_tab[curr_tmp_table].all_fields= &tmp_all_fields2;
5295 join_tab[curr_tmp_table].fields= &tmp_fields_list2;
5296 setup_tmptable_write_func(&join_tab[curr_tmp_table]);
5297
5298 tmp_table_param.field_count+= tmp_table_param.sum_func_count;
5299 tmp_table_param.sum_func_count= 0;
5300 }
5301 if (join_tab[curr_tmp_table].table->distinct)
5302 select_distinct= false; /* Each row is unique */
5303
5304 if (select_distinct && !group_list)
5305 {
5306 if (having)
5307 {
5308 join_tab[curr_tmp_table].having= having;
5309 having->update_used_tables();
5310 }
5311 join_tab[curr_tmp_table].distinct= true;
5312 explain_flags.set(ESC_DISTINCT, ESP_DUPS_REMOVAL);
5313 having= NULL;
5314 select_distinct= false;
5315 }
5316 /* Clean tmp_table_param for the next tmp table. */
5317 tmp_table_param.field_count= tmp_table_param.sum_func_count=
5318 tmp_table_param.func_count= 0;
5319
5320 tmp_table_param.copy_field= tmp_table_param.copy_field_end=0;
5321 first_record= sort_and_group=0;
5322
5323 if (!group_optimized_away)
5324 {
5325 group= false;
5326 }
5327 else
5328 {
5329 /*
5330 If grouping has been optimized away, a temporary table is
5331 normally not needed unless we're explicitly requested to create
5332 one (e.g. due to a SQL_BUFFER_RESULT hint or INSERT ... SELECT).
5333
5334 In this case (grouping was optimized away), temp_table was
5335 created without a grouping expression and JOIN::exec() will not
5336 perform the necessary grouping (by the use of end_send_group()
5337 or end_write_group()) if JOIN::group is set to false.
5338 */
5339 // the temporary table was explicitly requested
5340 DBUG_ASSERT(MY_TEST(select_options & OPTION_BUFFER_RESULT));
5341 // the temporary table does not have a grouping expression
5342 DBUG_ASSERT(!join_tab[curr_tmp_table].table->group);
5343 }
5344 calc_group_buffer(this, group_list);
5345 count_field_types(select_lex, &tmp_table_param, *curr_all_fields, false);
5346 }
5347
5348 if (group || implicit_grouping || tmp_table_param.sum_func_count)
5349 {
5350 if (make_group_fields(this, this))
5351 DBUG_RETURN(true);
5352
5353 DBUG_ASSERT(items3.is_null());
5354
5355 if (items0.is_null())
5356 init_items_ref_array();
5357 items3= ref_ptr_array_slice(4);
5358 setup_copy_fields(thd, &tmp_table_param,
5359 items3, tmp_fields_list3, tmp_all_fields3,
5360 curr_fields_list->elements, *curr_all_fields);
5361
5362 curr_fields_list= &tmp_fields_list3;
5363 curr_all_fields= &tmp_all_fields3;
5364 set_items_ref_array(items3);
5365 if (join_tab)
5366 {
5367 // Set grouped fields on the last table
5368 join_tab[primary_tables + tmp_tables - 1].ref_array= &items3;
5369 join_tab[primary_tables + tmp_tables - 1].all_fields= &tmp_all_fields3;
5370 join_tab[primary_tables + tmp_tables - 1].fields= &tmp_fields_list3;
5371 }
5372 if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true, true))
5373 DBUG_RETURN(true);
5374 if (prepare_sum_aggregators(sum_funcs,
5375 !join_tab ||
5376 !join_tab-> is_using_agg_loose_index_scan()))
5377 DBUG_RETURN(true);
5378 if (setup_sum_funcs(thd, sum_funcs) || thd->is_fatal_error)
5379 DBUG_RETURN(true);
5380 }
5381 if (group_list || order)
5382 {
5383 DBUG_PRINT("info",("Sorting for send_result_set_metadata"));
5384 THD_STAGE_INFO(thd, stage_sorting_result);
5385 /* If we have already done the group, add HAVING to sorted table */
5386 if (having && !group_list && !sort_and_group)
5387 {
5388 if (add_having_as_tmp_table_cond(curr_tmp_table))
5389 DBUG_RETURN(true);
5390 }
5391
5392 if (group)
5393 m_select_limit= HA_POS_ERROR;
5394 else if (!need_tmp)
5395 {
5396 /*
5397 We can abort sorting after thd->select_limit rows if there are no
5398 filter conditions for any tables after the sorted one.
5399 Filter conditions come in several forms:
5400 1. as a condition item attached to the join_tab, or
5401 2. as a keyuse attached to the join_tab (ref access).
5402 */
5403 for (uint i= const_tables + 1; i < primary_tables; i++)
5404 {
5405 JOIN_TAB *const tab= join_tab + i;
5406 if (tab->condition() || // 1
5407 (tab->keyuse && !tab->first_inner)) // 2
5408 {
5409 /* We have to sort all rows */
5410 m_select_limit= HA_POS_ERROR;
5411 break;
5412 }
5413 }
5414 }
5415 /*
5416 Here we add sorting stage for ORDER BY/GROUP BY clause, if the
5417 optimiser chose FILESORT to be faster than INDEX SCAN or there is
5418 no suitable index present.
5419 OPTION_FOUND_ROWS supersedes LIMIT and is taken into account.
5420 */
5421 DBUG_PRINT("info",("Sorting for order by/group by"));
5422 ORDER_with_src order_arg= group_list ? group_list : order;
5423 if (join_tab &&
5424 ordered_index_usage !=
5425 (group_list ? ordered_index_group_by : ordered_index_order_by) &&
5426 join_tab[curr_tmp_table].type != JT_CONST &&
5427 join_tab[curr_tmp_table].type != JT_EQ_REF) // Don't sort 1 row
5428 {
5429 // Sort either first non-const table or the last tmp table
5430 JOIN_TAB *sort_tab= &join_tab[curr_tmp_table];
5431 if (need_tmp && !materialize_join && !exec_tmp_table->group)
5432 explain_flags.set(order_arg.src, ESP_USING_TMPTABLE);
5433
5434 if (add_sorting_to_table(sort_tab, &order_arg))
5435 DBUG_RETURN(true);
5436 /*
5437 filesort_limit: Return only this many rows from filesort().
5438 We can use select_limit_cnt only if we have no group_by and 1 table.
5439 This allows us to use Bounded_queue for queries like:
5440 "select SQL_CALC_FOUND_ROWS * from t1 order by b desc limit 1;"
5441 m_select_limit == HA_POS_ERROR (we need a full table scan)
5442 unit->select_limit_cnt == 1 (we only need one row in the result set)
5443 */
5444 sort_tab->filesort->limit=
5445 (has_group_by || (primary_tables > curr_tmp_table + 1)) ?
5446 m_select_limit : unit->select_limit_cnt;
5447 }
5448 if (!plan_is_const() &&
5449 !join_tab[const_tables].table->sort.io_cache)
5450 {
5451 /*
5452 If no IO cache exists for the first table then we are using an
5453 INDEX SCAN and no filesort. Thus we should not remove the sorted
5454 attribute on the INDEX SCAN.
5455 */
5456 skip_sort_order= true;
5457 }
5458 }
5459 fields= curr_fields_list;
5460 // Reset before execution
5461 set_items_ref_array(items0);
5462 if (join_tab)
5463 join_tab[primary_tables + tmp_tables - 1].next_select=
5464 setup_end_select_func(this, NULL);
5465 group= has_group_by;
5466
5467 DBUG_RETURN(false);
5468 }
5469
5470
5471 /**
5472 @brief Add Filesort object to the given table to sort if with filesort
5473
5474 @param tab the JOIN_TAB object to attach created Filesort object to
5475 @param order List of expressions to sort the table by
5476
5477 @note This function moves tab->select, if any, to filesort->select
5478
5479 @return false on success, true on OOM
5480 */
5481
5482 bool
add_sorting_to_table(JOIN_TAB * tab,ORDER_with_src * order)5483 JOIN::add_sorting_to_table(JOIN_TAB *tab, ORDER_with_src *order)
5484 {
5485 explain_flags.set(order->src, ESP_USING_FILESORT);
5486 tab->filesort= new (thd->mem_root) Filesort(*order, HA_POS_ERROR, tab->select);
5487 if (!tab->filesort)
5488 return true;
5489 /*
5490 Select was moved to filesort->select to force join_init_read_record to use
5491 sorted result instead of reading table through select.
5492 */
5493 if (tab->select)
5494 {
5495 tab->select= NULL;
5496 tab->set_condition(NULL, __LINE__);
5497 }
5498 tab->read_first_record= join_init_read_record;
5499 return false;
5500 }
5501
5502 /**
5503 Find a cheaper access key than a given @a key
5504
5505 @param tab NULL or JOIN_TAB of the accessed table
5506 @param order Linked list of ORDER BY arguments
5507 @param table Table if tab == NULL or tab->table
5508 @param usable_keys Key map to find a cheaper key in
5509 @param ref_key
5510 * 0 <= key < MAX_KEY - key number (hint) to start the search
5511 * -1 - no key number provided
5512 @param select_limit LIMIT value, or HA_POS_ERROR if no limit
5513 @param [out] new_key Key number if success, otherwise undefined
5514 @param [out] new_key_direction Return -1 (reverse) or +1 if success,
5515 otherwise undefined
5516 @param [out] new_select_limit Return adjusted LIMIT
5517 @param [out] new_used_key_parts NULL by default, otherwise return number
5518 of new_key prefix columns if success
5519 or undefined if the function fails
5520 @param [out] saved_best_key_parts NULL by default, otherwise preserve the
5521 value for further use in QUICK_SELECT_DESC
5522
5523 @note
5524 This function takes into account table->quick_condition_rows statistic
5525 (that is calculated by the make_join_statistics function).
5526 However, single table procedures such as mysql_update() and mysql_delete()
5527 never call make_join_statistics, so they have to update it manually
5528 (@see get_index_for_order()).
5529 */
5530
5531 static bool
test_if_cheaper_ordering(const JOIN_TAB * tab,ORDER * order,TABLE * table,key_map usable_keys,int ref_key,ha_rows select_limit,int * new_key,int * new_key_direction,ha_rows * new_select_limit,uint * new_used_key_parts,uint * saved_best_key_parts)5532 test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
5533 key_map usable_keys, int ref_key,
5534 ha_rows select_limit,
5535 int *new_key, int *new_key_direction,
5536 ha_rows *new_select_limit, uint *new_used_key_parts,
5537 uint *saved_best_key_parts)
5538 {
5539 DBUG_ENTER("test_if_cheaper_ordering");
5540 /*
5541 Check whether there is an index compatible with the given order
5542 usage of which is cheaper than usage of the ref_key index (ref_key>=0)
5543 or a table scan.
5544 It may be the case if ORDER/GROUP BY is used with LIMIT.
5545 */
5546 ha_rows best_select_limit= HA_POS_ERROR;
5547 JOIN *join= tab ? tab->join : NULL;
5548 uint nr;
5549 key_map keys;
5550 uint best_key_parts= 0;
5551 int best_key_direction= 0;
5552 ha_rows best_records= 0;
5553 double read_time;
5554 int best_key= -1;
5555 bool is_best_covering= FALSE;
5556 double fanout= 1;
5557 ha_rows table_records= table->file->stats.records;
5558 bool group= join && join->group && order == join->group_list;
5559 ha_rows refkey_rows_estimate= table->quick_condition_rows;
5560 const bool has_limit= (select_limit != HA_POS_ERROR);
5561
5562 /*
5563 If not used with LIMIT, only use keys if the whole query can be
5564 resolved with a key; This is because filesort() is usually faster than
5565 retrieving all rows through an index.
5566 */
5567 if (select_limit >= table_records)
5568 {
5569 keys= *table->file->keys_to_use_for_scanning();
5570 keys.merge(table->covering_keys);
5571
5572 /*
5573 We are adding here also the index specified in FORCE INDEX clause,
5574 if any.
5575 This is to allow users to use index in ORDER BY.
5576 */
5577 if (table->force_index)
5578 keys.merge(group ? table->keys_in_use_for_group_by :
5579 table->keys_in_use_for_order_by);
5580 keys.intersect(usable_keys);
5581 }
5582 else
5583 keys= usable_keys;
5584
5585 if (join)
5586 {
5587 read_time= tab->position->read_time;
5588 for (const JOIN_TAB *jt= tab + 1;
5589 jt < join->join_tab + join->primary_tables; jt++)
5590 fanout*= jt->position->records_read; // fanout is always >= 1
5591 }
5592 else
5593 read_time= table->file->scan_time();
5594
5595 /*
5596 Calculate the selectivity of the ref_key for REF_ACCESS. For
5597 RANGE_ACCESS we use table->quick_condition_rows.
5598 */
5599 if (ref_key >= 0 && tab->type == JT_REF)
5600 {
5601 if (table->quick_keys.is_set(ref_key))
5602 refkey_rows_estimate= table->quick_rows[ref_key];
5603 else
5604 {
5605 const KEY *ref_keyinfo= table->key_info + ref_key;
5606 refkey_rows_estimate= ref_keyinfo->rec_per_key[tab->ref.key_parts - 1];
5607 }
5608 set_if_bigger(refkey_rows_estimate, 1);
5609 }
5610 for (nr=0; nr < table->s->keys ; nr++)
5611 {
5612 int direction;
5613 uint used_key_parts;
5614
5615 if (keys.is_set(nr) &&
5616 (direction= test_if_order_by_key(order, table, nr, &used_key_parts)))
5617 {
5618 /*
5619 At this point we are sure that ref_key is a non-ordering
5620 key (where "ordering key" is a key that will return rows
5621 in the order required by ORDER BY).
5622 */
5623 DBUG_ASSERT (ref_key != (int) nr);
5624
5625 bool is_covering= table->covering_keys.is_set(nr) ||
5626 (nr == table->s->primary_key &&
5627 table->file->primary_key_is_clustered()) ||
5628 (table->file->index_flags(nr, 0, 0)
5629 & HA_CLUSTERED_INDEX);
5630
5631 /*
5632 Don't use an index scan with ORDER BY without limit.
5633 For GROUP BY without limit always use index scan
5634 if there is a suitable index.
5635 Why we hold to this asymmetry hardly can be explained
5636 rationally. It's easy to demonstrate that using
5637 temporary table + filesort could be cheaper for grouping
5638 queries too.
5639 */
5640 if (is_covering ||
5641 select_limit != HA_POS_ERROR ||
5642 (ref_key < 0 && (group || table->force_index)))
5643 {
5644 double rec_per_key;
5645 double index_scan_time;
5646 KEY *keyinfo= table->key_info+nr;
5647 if (select_limit == HA_POS_ERROR)
5648 select_limit= table_records;
5649 if (group)
5650 {
5651 /*
5652 Used_key_parts can be larger than keyinfo->key_parts
5653 when using a secondary index clustered with a primary
5654 key (e.g. as in Innodb).
5655 See Bug #28591 for details.
5656 */
5657 rec_per_key= used_key_parts &&
5658 used_key_parts <= actual_key_parts(keyinfo) ?
5659 keyinfo->rec_per_key[used_key_parts-1] : 1;
5660 set_if_bigger(rec_per_key, 1);
5661 /*
5662 With a grouping query each group containing on average
5663 rec_per_key records produces only one row that will
5664 be included into the result set.
5665 */
5666 if (select_limit > table_records/rec_per_key)
5667 select_limit= table_records;
5668 else
5669 select_limit= (ha_rows) (select_limit*rec_per_key);
5670 }
5671 /*
5672 If tab=tk is not the last joined table tn then to get first
5673 L records from the result set we can expect to retrieve
5674 only L/fanout(tk,tn) where fanout(tk,tn) says how many
5675 rows in the record set on average will match each row tk.
5676 Usually our estimates for fanouts are too pessimistic.
5677 So the estimate for L/fanout(tk,tn) will be too optimistic
5678 and as result we'll choose an index scan when using ref/range
5679 access + filesort will be cheaper.
5680 */
5681 select_limit= (ha_rows) (select_limit < fanout ?
5682 1 : select_limit/fanout);
5683 /*
5684 We assume that each of the tested indexes is not correlated
5685 with ref_key. Thus, to select first N records we have to scan
5686 N/selectivity(ref_key) index entries.
5687 selectivity(ref_key) = #scanned_records/#table_records =
5688 refkey_rows_estimate/table_records.
5689 In any case we can't select more than #table_records.
5690 N/(refkey_rows_estimate/table_records) > table_records
5691 <=> N > refkey_rows_estimate.
5692 */
5693 if (select_limit > refkey_rows_estimate)
5694 select_limit= table_records;
5695 else
5696 select_limit= (ha_rows) (select_limit *
5697 (double) table_records /
5698 refkey_rows_estimate);
5699 rec_per_key= keyinfo->rec_per_key[keyinfo->user_defined_key_parts - 1];
5700 set_if_bigger(rec_per_key, 1);
5701 /*
5702 Here we take into account the fact that rows are
5703 accessed in sequences rec_per_key records in each.
5704 Rows in such a sequence are supposed to be ordered
5705 by rowid/primary key. When reading the data
5706 in a sequence we'll touch not more pages than the
5707 table file contains.
5708 TODO. Use the formula for a disk sweep sequential access
5709 to calculate the cost of accessing data rows for one
5710 index entry.
5711 */
5712 index_scan_time= select_limit/rec_per_key *
5713 min(rec_per_key, table->file->scan_time());
5714 if ((ref_key < 0 && is_covering) ||
5715 (ref_key < 0 && (group || table->force_index)) ||
5716 index_scan_time < read_time)
5717 {
5718 ha_rows quick_records= table_records;
5719 ha_rows refkey_select_limit= (ref_key >= 0 &&
5720 table->covering_keys.is_set(ref_key)) ?
5721 refkey_rows_estimate :
5722 HA_POS_ERROR;
5723 if ((is_best_covering && !is_covering) ||
5724 (is_covering && refkey_select_limit < select_limit))
5725 continue;
5726 if (table->quick_keys.is_set(nr))
5727 quick_records= table->quick_rows[nr];
5728 if (best_key < 0 ||
5729 (select_limit <= min(quick_records,best_records) ?
5730 keyinfo->user_defined_key_parts < best_key_parts :
5731 quick_records < best_records) ||
5732 ((quick_records == best_records) &&
5733 !is_best_covering && is_covering))
5734 {
5735 best_key= nr;
5736 best_key_parts= keyinfo->user_defined_key_parts;
5737 if (saved_best_key_parts)
5738 *saved_best_key_parts= used_key_parts;
5739 best_records= quick_records;
5740 is_best_covering= is_covering;
5741 best_key_direction= direction;
5742 best_select_limit= select_limit;
5743 }
5744 }
5745 }
5746 }
5747 }
5748
5749 if (best_key < 0 || best_key == ref_key)
5750 DBUG_RETURN(FALSE);
5751
5752 *new_key= best_key;
5753 *new_key_direction= best_key_direction;
5754 *new_select_limit= has_limit ? best_select_limit : table_records;
5755 if (new_used_key_parts != NULL)
5756 *new_used_key_parts= best_key_parts;
5757
5758 DBUG_RETURN(TRUE);
5759 }
5760
5761
5762 /**
5763 Find a key to apply single table UPDATE/DELETE by a given ORDER
5764
5765 @param order Linked list of ORDER BY arguments
5766 @param table Table to find a key
5767 @param select Pointer to access/update select->quick (if any)
5768 @param limit LIMIT clause parameter
5769 @param [out] need_sort TRUE if filesort needed
5770 @param [out] reverse
5771 TRUE if the key is reversed again given ORDER (undefined if key == MAX_KEY)
5772
5773 @return
5774 - MAX_KEY if no key found (need_sort == TRUE)
5775 - MAX_KEY if quick select result order is OK (need_sort == FALSE)
5776 - key number (either index scan or quick select) (need_sort == FALSE)
5777
5778 @note
5779 Side effects:
5780 - may deallocate or deallocate and replace select->quick;
5781 - may set table->quick_condition_rows and table->quick_rows[...]
5782 to table->file->stats.records.
5783 */
5784
get_index_for_order(ORDER * order,TABLE * table,SQL_SELECT * select,ha_rows limit,bool * need_sort,bool * reverse)5785 uint get_index_for_order(ORDER *order, TABLE *table, SQL_SELECT *select,
5786 ha_rows limit, bool *need_sort, bool *reverse)
5787 {
5788 if (select && select->quick && select->quick->unique_key_range())
5789 { // Single row select (always "ordered"): Ok to use with key field UPDATE
5790 *need_sort= FALSE;
5791 /*
5792 Returning of MAX_KEY here prevents updating of used_key_is_modified
5793 in mysql_update(). Use quick select "as is".
5794 */
5795 return MAX_KEY;
5796 }
5797
5798 if (!order)
5799 {
5800 *need_sort= FALSE;
5801 if (select && select->quick)
5802 return select->quick->index; // index or MAX_KEY, use quick select as is
5803 else
5804 return table->file->key_used_on_scan; // MAX_KEY or index for some engines
5805 }
5806
5807 if (!is_simple_order(order)) // just to cut further expensive checks
5808 {
5809 *need_sort= TRUE;
5810 return MAX_KEY;
5811 }
5812
5813 if (select && select->quick)
5814 {
5815 if (select->quick->index == MAX_KEY)
5816 {
5817 *need_sort= TRUE;
5818 return MAX_KEY;
5819 }
5820
5821 uint used_key_parts;
5822 switch (test_if_order_by_key(order, table, select->quick->index,
5823 &used_key_parts)) {
5824 case 1: // desired order
5825 *need_sort= FALSE;
5826 return select->quick->index;
5827 case 0: // unacceptable order
5828 *need_sort= TRUE;
5829 return MAX_KEY;
5830 case -1: // desired order, but opposite direction
5831 {
5832 QUICK_SELECT_I *reverse_quick;
5833 if ((reverse_quick=
5834 select->quick->make_reverse(used_key_parts)))
5835 {
5836 select->set_quick(reverse_quick);
5837 *need_sort= FALSE;
5838 return select->quick->index;
5839 }
5840 else
5841 {
5842 *need_sort= TRUE;
5843 return MAX_KEY;
5844 }
5845 }
5846 }
5847 DBUG_ASSERT(0);
5848 }
5849 else if (limit != HA_POS_ERROR)
5850 { // check if some index scan & LIMIT is more efficient than filesort
5851
5852 /*
5853 Update quick_condition_rows since single table UPDATE/DELETE procedures
5854 don't call make_join_statistics() and leave this variable uninitialized.
5855 */
5856 table->quick_condition_rows= table->file->stats.records;
5857
5858 int key, direction;
5859 if (test_if_cheaper_ordering(NULL, order, table,
5860 table->keys_in_use_for_order_by, -1,
5861 limit,
5862 &key, &direction, &limit) &&
5863 !is_key_used(table, key, table->write_set))
5864 {
5865 *need_sort= FALSE;
5866 *reverse= (direction < 0);
5867 return key;
5868 }
5869 }
5870 *need_sort= TRUE;
5871 return MAX_KEY;
5872 }
5873
5874
5875 /**
5876 Returns number of key parts depending on
5877 OPTIMIZER_SWITCH_USE_INDEX_EXTENSIONS flag.
5878
5879 @param key_info pointer to KEY structure
5880
5881 @return number of key parts.
5882 */
5883
actual_key_parts(KEY * key_info)5884 uint actual_key_parts(KEY *key_info)
5885 {
5886 return key_info->table->in_use->
5887 optimizer_switch_flag(OPTIMIZER_SWITCH_USE_INDEX_EXTENSIONS) ?
5888 key_info->actual_key_parts : key_info->user_defined_key_parts;
5889 }
5890
5891
5892 /**
5893 Returns key flags depending on
5894 OPTIMIZER_SWITCH_USE_INDEX_EXTENSIONS flag.
5895
5896 @param key_info pointer to KEY structure
5897
5898 @return key flags.
5899 */
5900
actual_key_flags(KEY * key_info)5901 uint actual_key_flags(KEY *key_info)
5902 {
5903 return key_info->table->in_use->
5904 optimizer_switch_flag(OPTIMIZER_SWITCH_USE_INDEX_EXTENSIONS) ?
5905 key_info->actual_flags : key_info->flags;
5906 }
5907
5908 /**
5909 @} (end of group Query_Optimizer)
5910 */
5911