1 /* Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23 /**
24 @file
25
26 @brief Evaluate query expressions, throughout resolving, optimization and
27 execution.
28
29 @defgroup Query_Optimizer Query Optimizer
30 @{
31 */
32
33 #include "sql_select.h"
34 #include "sql_table.h" // primary_key_name
35 #include "sql_derived.h"
36 #include "probes_mysql.h"
37 #include "opt_trace.h"
38 #include "key.h" // key_copy, key_cmp, key_cmp_if_same
39 #include "lock.h" // mysql_unlock_some_tables,
40 // mysql_unlock_read_tables
41 #include "sql_show.h" // append_identifier
42 #include "sql_base.h"
43 #include "auth_common.h" // *_ACL
44 #include "sql_test.h" // misc. debug printing utilities
45 #include "records.h" // init_read_record, end_read_record
46 #include "filesort.h" // filesort_free_buffers
47 #include "opt_explain.h"
48 #include "sql_join_buffer.h" // JOIN_CACHE
49 #include "sql_optimizer.h" // JOIN
50 #include "sql_tmp_table.h" // tmp tables
51 #include "debug_sync.h" // DEBUG_SYNC
52 #include "item_sum.h" // Item_sum
53 #include "sql_planner.h" // calculate_condition_filter
54 #include "opt_hints.h" // hint_key_state()
55
56 #include <algorithm>
57
58 using std::max;
59 using std::min;
60
61 const char store_key_const_item::static_name[]= "const";
62
63 static store_key *get_store_key(THD *thd,
64 Key_use *keyuse, table_map used_tables,
65 KEY_PART_INFO *key_part, uchar *key_buff,
66 uint maybe_null);
67 bool const_expression_in_where(Item *conds,Item *item, Item **comp_item);
68 uint find_shortest_key(TABLE *table, const key_map *usable_keys);
69 /**
70 Handle a data manipulation query, from preparation through cleanup
71
72 @param thd thread handler
73 @param lex query to be processed
74 @param result sink of result of query execution.
75 may be protocol object (for passing result to a client),
76 insert object, update object, delete object, etc.
77 @param added_options additional options for detailed control over execution
78 @param removed_options options that are not applicable for this command
79
80 @returns false if success, true if error
81
82 @details
83 Processing a query goes through 5 phases (parsing is already done)
84 - Preparation
85 - Locking of tables
86 - Optimization
87 - Execution or explain
88 - Cleanup
89 The queries handled by this function are:
90
91 SELECT
92 INSERT ... SELECT
93 REPLACE ... SELECT
94 UPDATE (multi-table)
95 DELETE (multi-table)
96
97 @todo make this function also handle INSERT ... VALUES, single-table
98 UPDATE and DELETE, SET and DO.
99
100 The function processes simple query expressions without UNION and
101 without multi-level ORDER BY/LIMIT separately.
102 Such queries are executed with a more direct code path.
103 */
handle_query(THD * thd,LEX * lex,Query_result * result,ulonglong added_options,ulonglong removed_options)104 bool handle_query(THD *thd, LEX *lex, Query_result *result,
105 ulonglong added_options, ulonglong removed_options)
106 {
107 DBUG_ENTER("handle_query");
108
109 SELECT_LEX_UNIT *const unit= lex->unit;
110 SELECT_LEX *const select= unit->first_select();
111 bool res;
112
113 DBUG_ASSERT(thd == unit->thd);
114
115 DBUG_ASSERT(!unit->is_prepared() && !unit->is_optimized() &&
116 !unit->is_executed());
117
118 if (lex->proc_analyse && lex->sql_command != SQLCOM_SELECT)
119 {
120 my_error(ER_WRONG_USAGE, MYF(0), "PROCEDURE", "non-SELECT");
121 DBUG_RETURN(true);
122 }
123
124 const bool single_query= unit->is_simple();
125
126 lex->used_tables=0; // Updated by setup_fields
127
128 THD_STAGE_INFO(thd, stage_init);
129
130 if (single_query)
131 {
132 unit->set_limit(unit->global_parameters());
133
134 select->context.resolve_in_select_list= true;
135 select->set_query_result(result);
136 select->make_active_options(added_options, removed_options);
137 select->fields_list= select->item_list;
138
139 if (select->prepare(thd))
140 goto err;
141
142 unit->set_prepared();
143 }
144 else
145 {
146 if (unit->prepare(thd, result, SELECT_NO_UNLOCK | added_options,
147 removed_options))
148 goto err;
149 }
150
151 DBUG_ASSERT(!lex->is_query_tables_locked());
152 /*
153 Locking of tables is done after preparation but before optimization.
154 This allows to do better partition pruning and avoid locking unused
155 partitions. As a consequence, in such a case, prepare stage can rely only
156 on metadata about tables used and not data from them.
157 */
158 if (lock_tables(thd, lex->query_tables, lex->table_count, 0))
159 goto err;
160
161 /*
162 Register query result in cache.
163 Tables must be locked before storing the query in the query cache.
164 Transactional engines must be signalled that the statement has started,
165 by calling external_lock().
166 */
167 query_cache.store_query(thd, lex->query_tables);
168
169 if (single_query)
170 {
171 if (select->optimize(thd))
172 goto err;
173
174 unit->set_optimized();
175 }
176 else
177 {
178 if (unit->optimize(thd))
179 goto err;
180 }
181
182 if (lex->is_explain())
183 {
184 if (explain_query(thd, unit))
185 goto err; /* purecov: inspected */
186 }
187 else
188 {
189 if (single_query)
190 {
191 select->join->exec();
192 unit->set_executed();
193 if (thd->is_error())
194 goto err;
195 }
196 else
197 {
198 if (unit->execute(thd))
199 goto err;
200 }
201 }
202
203 DBUG_ASSERT(!thd->is_error());
204
205 thd->update_previous_found_rows();
206 THD_STAGE_INFO(thd, stage_end);
207
208 // Do partial cleanup (preserve plans for EXPLAIN).
209 res= unit->cleanup(false);
210
211 DBUG_RETURN(res);
212
213 err:
214 DBUG_ASSERT(thd->is_error() || thd->killed);
215 DBUG_PRINT("info",("report_error: %d", thd->is_error()));
216 THD_STAGE_INFO(thd, stage_end);
217
218 (void) unit->cleanup(false);
219
220 // Abort the result set (if it has been prepared).
221 result->abort_result_set();
222
223 DBUG_RETURN(thd->is_error());
224 }
225
226
227 /*****************************************************************************
228 Check fields, find best join, do the select and output fields.
229 All tables must be opened.
230 *****************************************************************************/
231
232 /**
233 @brief Check if two items are compatible wrt. materialization.
234
235 @param outer Expression from outer query
236 @param inner Expression from inner query
237
238 @retval TRUE If subquery types allow materialization.
239 @retval FALSE Otherwise.
240 */
241
types_allow_materialization(Item * outer,Item * inner)242 bool types_allow_materialization(Item *outer, Item *inner)
243
244 {
245 if (outer->result_type() != inner->result_type())
246 return false;
247 switch (outer->result_type()) {
248 case ROW_RESULT:
249 // Materialization of rows nested inside rows is not currently supported.
250 return false;
251 case STRING_RESULT:
252 if (outer->is_temporal_with_date() != inner->is_temporal_with_date())
253 return false;
254 if (!(outer->collation.collation == inner->collation.collation
255 /*&& outer->max_length <= inner->max_length */))
256 return false;
257 /*case INT_RESULT:
258 if (!(outer->unsigned_flag ^ inner->unsigned_flag))
259 return false; */
260 default:
261 ; /* suitable for materialization */
262 }
263 return true;
264 }
265
266
267 /*
268 Check if the table's rowid is included in the temptable
269
270 SYNOPSIS
271 sj_table_is_included()
272 join The join
273 join_tab The table to be checked
274
275 DESCRIPTION
276 SemiJoinDuplicateElimination: check the table's rowid should be included
277 in the temptable. This is so if
278
279 1. The table is not embedded within some semi-join nest
280 2. The has been pulled out of a semi-join nest, or
281
282 3. The table is functionally dependent on some previous table
283
284 [4. This is also true for constant tables that can't be
285 NULL-complemented but this function is not called for such tables]
286
287 RETURN
288 TRUE - Include table's rowid
289 FALSE - Don't
290 */
291
sj_table_is_included(JOIN * join,JOIN_TAB * join_tab)292 static bool sj_table_is_included(JOIN *join, JOIN_TAB *join_tab)
293 {
294 if (join_tab->emb_sj_nest)
295 return FALSE;
296
297 /* Check if this table is functionally dependent on the tables that
298 are within the same outer join nest
299 */
300 TABLE_LIST *embedding= join_tab->table_ref->embedding;
301 if (join_tab->type() == JT_EQ_REF)
302 {
303 table_map depends_on= 0;
304 uint idx;
305
306 for (uint kp= 0; kp < join_tab->ref().key_parts; kp++)
307 depends_on |= join_tab->ref().items[kp]->used_tables();
308
309 Table_map_iterator it(depends_on & ~PSEUDO_TABLE_BITS);
310 while ((idx= it.next_bit())!=Table_map_iterator::BITMAP_END)
311 {
312 JOIN_TAB *ref_tab= join->map2table[idx];
313 if (embedding != ref_tab->table_ref->embedding)
314 return TRUE;
315 }
316 /* Ok, functionally dependent */
317 return FALSE;
318 }
319 /* Not functionally dependent => need to include*/
320 return TRUE;
321 }
322
323
324 /**
325 Setup the strategies to eliminate semi-join duplicates.
326
327 @param join Join to process
328 @param no_jbuf_after Do not use join buffering after the table with this
329 number
330
331 @retval FALSE OK
332 @retval TRUE Out of memory error
333
334 @details
335 Setup the strategies to eliminate semi-join duplicates.
336 At the moment there are 5 strategies:
337
338 1. DuplicateWeedout (use of temptable to remove duplicates based on rowids
339 of row combinations)
340 2. FirstMatch (pick only the 1st matching row combination of inner tables)
341 3. LooseScan (scanning the sj-inner table in a way that groups duplicates
342 together and picking the 1st one)
343 4. MaterializeLookup (Materialize inner tables, then setup a scan over
344 outer correlated tables, lookup in materialized table)
345 5. MaterializeScan (Materialize inner tables, then setup a scan over
346 materialized tables, perform lookup in outer tables)
347
348 The join order has "duplicate-generating ranges", and every range is
349 served by one strategy or a combination of FirstMatch with with some
350 other strategy.
351
352 "Duplicate-generating range" is defined as a range within the join order
353 that contains all of the inner tables of a semi-join. All ranges must be
354 disjoint, if tables of several semi-joins are interleaved, then the ranges
355 are joined together, which is equivalent to converting
356 SELECT ... WHERE oe1 IN (SELECT ie1 ...) AND oe2 IN (SELECT ie2 )
357 to
358 SELECT ... WHERE (oe1, oe2) IN (SELECT ie1, ie2 ... ...)
359 .
360
361 Applicability conditions are as follows:
362
363 DuplicateWeedout strategy
364 ~~~~~~~~~~~~~~~~~~~~~~~~~
365
366 (ot|nt)* [ it ((it|ot|nt)* (it|ot))] (nt)*
367 +------+ +=========================+ +---+
368 (1) (2) (3)
369
370 (1) - Prefix of OuterTables (those that participate in
371 IN-equality and/or are correlated with subquery) and outer
372 Non-correlated tables.
373 (2) - The handled range. The range starts with the first sj-inner
374 table, and covers all sj-inner and outer tables
375 Within the range, Inner, Outer, outer non-correlated tables
376 may follow in any order.
377 (3) - The suffix of outer non-correlated tables.
378
379 FirstMatch strategy
380 ~~~~~~~~~~~~~~~~~~~
381
382 (ot|nt)* [ it ((it|nt)* it) ] (nt)*
383 +------+ +==================+ +---+
384 (1) (2) (3)
385
386 (1) - Prefix of outer correlated and non-correlated tables
387 (2) - The handled range, which may contain only inner and
388 non-correlated tables.
389 (3) - The suffix of outer non-correlated tables.
390
391 LooseScan strategy
392 ~~~~~~~~~~~~~~~~~~
393
394 (ot|ct|nt) [ loosescan_tbl (ot|nt|it)* it ] (ot|nt)*
395 +--------+ +===========+ +=============+ +------+
396 (1) (2) (3) (4)
397
398 (1) - Prefix that may contain any outer tables. The prefix must contain
399 all the non-trivially correlated outer tables. (non-trivially means
400 that the correlation is not just through the IN-equality).
401
402 (2) - Inner table for which the LooseScan scan is performed.
403 Notice that special requirements for existence of certain indexes
404 apply to this table, @see class Loose_scan_opt.
405
406 (3) - The remainder of the duplicate-generating range. It is served by
407 application of FirstMatch strategy. Outer IN-correlated tables
408 must be correlated to the LooseScan table but not to the inner
409 tables in this range. (Currently, there can be no outer tables
410 in this range because of implementation restrictions,
411 @see Optimize_table_order::advance_sj_state()).
412
413 (4) - The suffix of outer correlated and non-correlated tables.
414
415 MaterializeLookup strategy
416 ~~~~~~~~~~~~~~~~~~~~~~~~~~
417
418 (ot|nt)* [ it (it)* ] (nt)*
419 +------+ +==========+ +---+
420 (1) (2) (3)
421
422 (1) - Prefix of outer correlated and non-correlated tables.
423
424 (2) - The handled range, which may contain only inner tables.
425 The inner tables are materialized in a temporary table that is
426 later used as a lookup structure for the outer correlated tables.
427
428 (3) - The suffix of outer non-correlated tables.
429
430 MaterializeScan strategy
431 ~~~~~~~~~~~~~~~~~~~~~~~~~~
432
433 (ot|nt)* [ it (it)* ] (ot|nt)*
434 +------+ +==========+ +-----+
435 (1) (2) (3)
436
437 (1) - Prefix of outer correlated and non-correlated tables.
438
439 (2) - The handled range, which may contain only inner tables.
440 The inner tables are materialized in a temporary table which is
441 later used to setup a scan.
442
443 (3) - The suffix of outer correlated and non-correlated tables.
444
445 Note that MaterializeLookup and MaterializeScan has overlap in their patterns.
446 It may be possible to consolidate the materialization strategies into one.
447
448 The choice between the strategies is made by the join optimizer (see
449 advance_sj_state() and fix_semijoin_strategies()).
450 This function sets up all fields/structures/etc needed for execution except
451 for setup/initialization of semi-join materialization which is done in
452 setup_materialized_table().
453 */
454
setup_semijoin_dups_elimination(JOIN * join,uint no_jbuf_after)455 static bool setup_semijoin_dups_elimination(JOIN *join, uint no_jbuf_after)
456 {
457 uint tableno;
458 THD *thd= join->thd;
459 DBUG_ENTER("setup_semijoin_dups_elimination");
460 ASSERT_BEST_REF_IN_JOIN_ORDER(join);
461
462 if (join->select_lex->sj_nests.is_empty())
463 DBUG_RETURN(FALSE);
464
465 QEP_TAB *const qep_array= join->qep_tab;
466 for (tableno= join->const_tables; tableno < join->primary_tables; )
467 {
468 #ifndef DBUG_OFF
469 const bool tab_in_sj_nest= join->best_ref[tableno]->emb_sj_nest != NULL;
470 #endif
471 QEP_TAB *const tab= &qep_array[tableno];
472 POSITION *const pos= tab->position();
473
474 if (pos->sj_strategy == SJ_OPT_NONE)
475 {
476 tableno++; // nothing to do
477 continue;
478 }
479 QEP_TAB *last_sj_tab= tab + pos->n_sj_tables - 1;
480 switch (pos->sj_strategy) {
481 case SJ_OPT_MATERIALIZE_LOOKUP:
482 case SJ_OPT_MATERIALIZE_SCAN:
483 DBUG_ASSERT(false); // Should not occur among "primary" tables
484 // Do nothing
485 tableno+= pos->n_sj_tables;
486 break;
487 case SJ_OPT_LOOSE_SCAN:
488 {
489 DBUG_ASSERT(tab_in_sj_nest); // First table must be inner
490 /* We jump from the last table to the first one */
491 tab->match_tab= last_sj_tab->idx();
492
493 /* For LooseScan, duplicate elimination is based on rows being sorted
494 on key. We need to make sure that range select keeps the sorted index
495 order. (When using MRR it may not.)
496
497 Note: need_sorted_output() implementations for range select classes
498 that do not support sorted output, will trigger an assert. This
499 should not happen since LooseScan strategy is only picked if sorted
500 output is supported.
501 */
502 if (tab->quick())
503 {
504 DBUG_ASSERT(tab->quick()->index == pos->loosescan_key);
505 tab->quick()->need_sorted_output();
506 }
507
508 const uint keyno= pos->loosescan_key;
509 DBUG_ASSERT(tab->keys().is_set(keyno));
510 tab->set_index(keyno);
511
512 /* Calculate key length */
513 uint keylen= 0;
514 for (uint kp= 0; kp < pos->loosescan_parts; kp++)
515 keylen+= tab->table()->key_info[keyno].key_part[kp].store_length;
516 tab->loosescan_key_len= keylen;
517
518 if (pos->n_sj_tables > 1)
519 {
520 last_sj_tab->firstmatch_return= tab->idx();
521 last_sj_tab->match_tab= last_sj_tab->idx();
522 }
523 tableno+= pos->n_sj_tables;
524 break;
525 }
526 case SJ_OPT_DUPS_WEEDOUT:
527 {
528 DBUG_ASSERT(tab_in_sj_nest); // First table must be inner
529 /*
530 Consider a semijoin of one outer and one inner table, both
531 with two rows. The inner table is assumed to be confluent
532 (See sj_opt_materialize_lookup)
533
534 If normal nested loop execution is used, we do not need to
535 include semi-join outer table rowids in the duplicate
536 weedout temp table since NL guarantees that outer table rows
537 are encountered only consecutively and because all rows in
538 the temp table are deleted for every new outer table
539 combination (example is with a confluent inner table):
540
541 ot1.row1|it1.row1
542 '-> temp table's have_confluent_row == FALSE
543 |-> output ot1.row1
544 '-> set have_confluent_row= TRUE
545 ot1.row1|it1.row2
546 |-> temp table's have_confluent_row == TRUE
547 | '-> do not output ot1.row1
548 '-> no more join matches - set have_confluent_row= FALSE
549 ot1.row2|it1.row1
550 '-> temp table's have_confluent_row == FALSE
551 |-> output ot1.row2
552 '-> set have_confluent_row= TRUE
553 ...
554
555 Note: not having outer table rowids in the temp table and
556 then emptying the temp table when a new outer table row
557 combinition is encountered is an optimization. Including
558 outer table rowids in the temp table is not harmful but
559 wastes memory.
560
561 Now consider the join buffering algorithms (BNL/BKA). These
562 algorithms join each inner row with outer rows in "reverse"
563 order compared to NL. Effectively, this means that outer
564 table rows may be encountered multiple times in a
565 non-consecutive manner:
566
567 NL: BNL/BKA:
568 ot1.row1|it1.row1 ot1.row1|it1.row1
569 ot1.row1|it1.row2 ot1.row2|it1.row1
570 ot1.row2|it1.row1 ot1.row1|it1.row2
571 ot1.row2|it1.row2 ot1.row2|it1.row2
572
573 It is clear from the above that there is no place we can
574 empty the temp table like we do in NL to avoid storing outer
575 table rowids.
576
577 Below we check if join buffering might be used. If so, set
578 first_table to the first non-constant table so that outer
579 table rowids are included in the temp table. Do not destroy
580 other duplicate elimination methods.
581 */
582 uint first_table= tableno;
583 for (uint sj_tableno= tableno;
584 sj_tableno < tableno + pos->n_sj_tables;
585 sj_tableno++)
586 {
587 if (join->best_ref[sj_tableno]->use_join_cache() &&
588 sj_tableno <= no_jbuf_after)
589 {
590 /* Join buffering will probably be used */
591 first_table= join->const_tables;
592 break;
593 }
594 }
595
596 QEP_TAB *const first_sj_tab= qep_array + first_table;
597 if (last_sj_tab->first_inner() != NO_PLAN_IDX &&
598 first_sj_tab->first_inner() != last_sj_tab->first_inner())
599 {
600 /*
601 The first duplicate weedout table is an outer table of an outer join
602 and the last duplicate weedout table is one of the inner tables of
603 the outer join.
604 In this case, we must assure that all the inner tables of the
605 outer join are part of the duplicate weedout operation.
606 This is to assure that NULL-extension for inner tables of an
607 outer join is performed before duplicate elimination is performed,
608 otherwise we will have extra NULL-extended rows being output, which
609 should have been eliminated as duplicates.
610 */
611 QEP_TAB *tab2= &qep_array[last_sj_tab->first_inner()];
612 /*
613 First, locate the table that is the first inner table of the
614 outer join operation that first_sj_tab is outer for.
615 */
616 while (tab2->first_upper() != NO_PLAN_IDX &&
617 tab2->first_upper() != first_sj_tab->first_inner())
618 tab2= qep_array + tab2->first_upper();
619 // Then, extend the range with all inner tables of the join nest:
620 if (qep_array[tab2->first_inner()].last_inner() > last_sj_tab->idx())
621 last_sj_tab= &qep_array[qep_array[tab2->first_inner()].last_inner()];
622 }
623
624 SJ_TMP_TABLE::TAB sjtabs[MAX_TABLES];
625 SJ_TMP_TABLE::TAB *last_tab= sjtabs;
626 uint jt_rowid_offset= 0; // # tuple bytes are already occupied (w/o NULL bytes)
627 uint jt_null_bits= 0; // # null bits in tuple bytes
628 /*
629 Walk through the range and remember
630 - tables that need their rowids to be put into temptable
631 - the last outer table
632 */
633 for (QEP_TAB *tab_in_range= qep_array + first_table;
634 tab_in_range <= last_sj_tab;
635 tab_in_range++)
636 {
637 if (sj_table_is_included(join, join->best_ref[tab_in_range->idx()]))
638 {
639 last_tab->qep_tab= tab_in_range;
640 last_tab->rowid_offset= jt_rowid_offset;
641 jt_rowid_offset += tab_in_range->table()->file->ref_length;
642 if (tab_in_range->table()->is_nullable())
643 {
644 last_tab->null_byte= jt_null_bits / 8;
645 last_tab->null_bit= jt_null_bits++;
646 }
647 last_tab++;
648 tab_in_range->table()->prepare_for_position();
649 tab_in_range->keep_current_rowid= true;
650 }
651 }
652
653 SJ_TMP_TABLE *sjtbl;
654 if (jt_rowid_offset) /* Temptable has at least one rowid */
655 {
656 size_t tabs_size= (last_tab - sjtabs) * sizeof(SJ_TMP_TABLE::TAB);
657 if (!(sjtbl= new (thd->mem_root) SJ_TMP_TABLE) ||
658 !(sjtbl->tabs= (SJ_TMP_TABLE::TAB*) thd->alloc(tabs_size)))
659 DBUG_RETURN(TRUE); /* purecov: inspected */
660 memcpy(sjtbl->tabs, sjtabs, tabs_size);
661 sjtbl->is_confluent= FALSE;
662 sjtbl->tabs_end= sjtbl->tabs + (last_tab - sjtabs);
663 sjtbl->rowid_len= jt_rowid_offset;
664 sjtbl->null_bits= jt_null_bits;
665 sjtbl->null_bytes= (jt_null_bits + 7)/8;
666 sjtbl->tmp_table=
667 create_duplicate_weedout_tmp_table(thd,
668 sjtbl->rowid_len +
669 sjtbl->null_bytes,
670 sjtbl);
671 if (sjtbl->tmp_table->hash_field)
672 sjtbl->tmp_table->file->ha_index_init(0, 0);
673 join->sj_tmp_tables.push_back(sjtbl->tmp_table);
674 }
675 else
676 {
677 /*
678 This is confluent case where the entire subquery predicate does
679 not depend on anything at all, ie this is
680 WHERE const IN (uncorrelated select)
681 */
682 if (!(sjtbl= new (thd->mem_root) SJ_TMP_TABLE))
683 DBUG_RETURN(TRUE); /* purecov: inspected */
684 sjtbl->tmp_table= NULL;
685 sjtbl->is_confluent= TRUE;
686 sjtbl->have_confluent_row= FALSE;
687 }
688 qep_array[first_table].flush_weedout_table= sjtbl;
689 last_sj_tab->check_weed_out_table= sjtbl;
690
691 tableno+= pos->n_sj_tables;
692 break;
693 }
694 case SJ_OPT_FIRST_MATCH:
695 {
696 /*
697 Setup a "jump" from the last table in the range of inner tables
698 to the last outer table before the inner tables.
699 If there are outer tables inbetween the inner tables, we have to
700 setup a "split jump": Jump from the last inner table to the last
701 outer table within the range, then from the last inner table
702 before the outer table(s), jump to the last outer table before
703 this range of inner tables, etc.
704 */
705 plan_idx jump_to= tab->idx() - 1;
706 DBUG_ASSERT(tab_in_sj_nest); // First table must be inner
707 for (QEP_TAB *tab_in_range= tab;
708 tab_in_range <= last_sj_tab;
709 tab_in_range++)
710 {
711 if (!join->best_ref[tab_in_range->idx()]->emb_sj_nest)
712 {
713 /*
714 Let last non-correlated table be jump target for
715 subsequent inner tables.
716 */
717 jump_to= tab_in_range->idx();
718 }
719 else
720 {
721 /*
722 Assign jump target for last table in a consecutive range of
723 inner tables.
724 */
725 if (tab_in_range == last_sj_tab ||
726 !join->best_ref[tab_in_range->idx() + 1]->emb_sj_nest)
727 {
728 tab_in_range->firstmatch_return= jump_to;
729 tab_in_range->match_tab= last_sj_tab->idx();
730 }
731 }
732 }
733 tableno+= pos->n_sj_tables;
734 break;
735 }
736 }
737 }
738 DBUG_RETURN(FALSE);
739 }
740
741
742 /*
743 Destroy all temporary tables created by NL-semijoin runtime
744 */
745
destroy_sj_tmp_tables(JOIN * join)746 static void destroy_sj_tmp_tables(JOIN *join)
747 {
748 List_iterator<TABLE> it(join->sj_tmp_tables);
749 TABLE *table;
750 while ((table= it++))
751 {
752 /*
753 SJ-Materialization tables are initialized for either sequential reading
754 or index lookup, DuplicateWeedout tables are not initialized for read
755 (we only write to them), so need to call ha_index_or_rnd_end.
756 */
757 table->file->ha_index_or_rnd_end();
758 free_tmp_table(join->thd, table);
759 }
760 join->sj_tmp_tables.empty();
761 }
762
763
764 /**
765 Remove all rows from all temp tables used by NL-semijoin runtime
766
767 @param join The join to remove tables for
768
769 All rows must be removed from all temporary tables before every join
770 re-execution.
771 */
772
clear_sj_tmp_tables(JOIN * join)773 static int clear_sj_tmp_tables(JOIN *join)
774 {
775 int res;
776 List_iterator<TABLE> it(join->sj_tmp_tables);
777 TABLE *table;
778 while ((table= it++))
779 {
780 if ((res= table->file->ha_delete_all_rows()))
781 return res; /* purecov: inspected */
782 }
783 if (join->qep_tab)
784 {
785 Semijoin_mat_exec *sjm;
786 List_iterator<Semijoin_mat_exec> it2(join->sjm_exec_list);
787 while ((sjm= it2++))
788 {
789 {
790 QEP_TAB *const tab= &join->qep_tab[sjm->mat_table_index];
791 /*
792 If zero_result_cause is set, we have not gone through
793 pick_table_access_method() which sets materialize_table, so the
794 assertion is disabled in this case.
795 */
796 DBUG_ASSERT(join->zero_result_cause || tab->materialize_table);
797 tab->materialized= false;
798 // The materialized table must be re-read on next evaluation:
799 tab->table()->status= STATUS_GARBAGE | STATUS_NOT_FOUND;
800 }
801 }
802 }
803 return 0;
804 }
805
806
807 /**
808 Reset the state of this join object so that it is ready for a
809 new execution.
810 */
811
reset()812 void JOIN::reset()
813 {
814 DBUG_ENTER("JOIN::reset");
815
816 if (!executed)
817 DBUG_VOID_RETURN;
818
819 unit->offset_limit_cnt= (ha_rows)(select_lex->offset_limit ?
820 select_lex->offset_limit->val_uint() :
821 0ULL);
822
823 first_record= false;
824 group_sent= false;
825 reset_executed();
826
827 if (tmp_tables)
828 {
829 for (uint tmp= primary_tables; tmp < primary_tables + tmp_tables; tmp++)
830 {
831 TABLE *const tmp_table= qep_tab[tmp].table();
832 if (!tmp_table->is_created())
833 continue;
834 tmp_table->file->extra(HA_EXTRA_RESET_STATE);
835 tmp_table->file->ha_delete_all_rows();
836 free_io_cache(tmp_table);
837 filesort_free_buffers(tmp_table,0);
838 }
839 }
840 clear_sj_tmp_tables(this);
841 if (current_ref_ptrs != items0)
842 {
843 set_items_ref_array(items0);
844 set_group_rpa= false;
845 }
846
847 /* need to reset ref access state (see join_read_key) */
848 if (qep_tab)
849 {
850 for (uint i= 0; i < tables; i++)
851 {
852 QEP_TAB *const tab= &qep_tab[i];
853 /*
854 If qep_tab==NULL, we may still have done ref access (to read a const
855 table); const tables will not be re-read in the next execution of this
856 subquery, so resetting key_err is not needed.
857 */
858 tab->ref().key_err= TRUE;
859 /*
860 If the finished execution used "filesort", it may have reset "quick"
861 or "condition" when it didn't need them anymore. Restore them for the
862 new execution (the new filesort will need them when it starts).
863 */
864 tab->restore_quick_optim_and_condition();
865 }
866 }
867
868 /* Reset of sum functions */
869 if (sum_funcs)
870 {
871 Item_sum *func, **func_ptr= sum_funcs;
872 while ((func= *(func_ptr++)))
873 func->clear();
874 }
875
876 if (select_lex->has_ft_funcs())
877 {
878 /* TODO: move the code to JOIN::exec */
879 (void)init_ftfuncs(thd, select_lex);
880 }
881
882 DBUG_VOID_RETURN;
883 }
884
885
886 /**
887 Prepare join result.
888
889 @details Prepare join result prior to join execution or describing.
890 Instantiate derived tables and get schema tables result if necessary.
891
892 @return
893 TRUE An error during derived or schema tables instantiation.
894 FALSE Ok
895 */
896
prepare_result()897 bool JOIN::prepare_result()
898 {
899 DBUG_ENTER("JOIN::prepare_result");
900
901 error= 0;
902 // Create result tables for materialized views/derived tables
903 if (select_lex->materialized_derived_table_count && !zero_result_cause)
904 {
905 for (TABLE_LIST *tl= select_lex->leaf_tables; tl; tl= tl->next_leaf)
906 {
907 if (tl->is_view_or_derived() && tl->create_derived(thd))
908 goto err; /* purecov: inspected */
909 }
910 }
911
912 if (select_lex->query_result()->prepare2())
913 goto err;
914
915 if ((select_lex->active_options() & OPTION_SCHEMA_TABLE) &&
916 get_schema_tables_result(this, PROCESSED_BY_JOIN_EXEC))
917 goto err;
918
919 DBUG_RETURN(false);
920
921 err:
922 error= 1;
923 DBUG_RETURN(true);
924 }
925
926
927 /**
928 Clean up and destroy join object.
929
930 @return false if previous execution was successful, and true otherwise
931 */
932
destroy()933 bool JOIN::destroy()
934 {
935 DBUG_ENTER("JOIN::destroy");
936
937 cond_equal= 0;
938
939 set_plan_state(NO_PLAN);
940
941 if (qep_tab)
942 {
943 DBUG_ASSERT(!join_tab);
944 for (uint i= 0; i < tables; i++)
945 qep_tab[i].cleanup();
946 }
947 if (join_tab || best_ref)
948 {
949 for (uint i= 0; i < tables; i++)
950 {
951 JOIN_TAB *const tab= join_tab ? &join_tab[i] : best_ref[i];
952 tab->cleanup();
953 }
954 }
955
956 /*
957 We are not using tables anymore
958 Unlock all tables. We may be in an INSERT .... SELECT statement.
959 */
960
961 // Run Cached_item DTORs!
962 group_fields.delete_elements();
963
964 /*
965 We can't call delete_elements() on copy_funcs as this will cause
966 problems in free_elements() as some of the elements are then deleted.
967 */
968 tmp_table_param.copy_funcs.empty();
969 tmp_table_param.cleanup();
970 /* Cleanup items referencing temporary table columns */
971 cleanup_item_list(tmp_all_fields1);
972 cleanup_item_list(tmp_all_fields3);
973 destroy_sj_tmp_tables(this);
974
975 List_iterator<Semijoin_mat_exec> sjm_list_it(sjm_exec_list);
976 Semijoin_mat_exec *sjm;
977 while ((sjm= sjm_list_it++))
978 delete sjm;
979 sjm_exec_list.empty();
980
981 keyuse_array.clear();
982 DBUG_RETURN(error);
983 }
984
985
cleanup_item_list(List<Item> & items) const986 void JOIN::cleanup_item_list(List<Item> &items) const
987 {
988 if (!items.is_empty())
989 {
990 List_iterator_fast<Item> it(items);
991 Item *item;
992 while ((item= it++))
993 item->cleanup();
994 }
995 }
996
997
998 /**
999 Optimize a query block and all inner query expressions
1000
1001 @param thd thread handler
1002 @returns false if success, true if error
1003 */
1004
optimize(THD * thd)1005 bool SELECT_LEX::optimize(THD *thd)
1006 {
1007 DBUG_ENTER("SELECT_LEX::optimize");
1008
1009 DBUG_ASSERT(join == NULL);
1010 JOIN *const join_local= new JOIN(thd, this);
1011 if (!join_local)
1012 DBUG_RETURN(true); /* purecov: inspected */
1013
1014 set_join(join_local);
1015
1016 if (join->optimize())
1017 DBUG_RETURN(true);
1018
1019 for (SELECT_LEX_UNIT *unit= first_inner_unit(); unit; unit= unit->next_unit())
1020 {
1021 // Derived tables and const subqueries are already optimized
1022 if (!unit->is_optimized() && unit->optimize(thd))
1023 DBUG_RETURN(true);
1024 }
1025
1026 DBUG_RETURN(false);
1027 }
1028
1029 /*****************************************************************************
1030 Go through all combinations of not marked tables and find the one
1031 which uses least records
1032 *****************************************************************************/
1033
1034 /**
1035 Find how much space the prevous read not const tables takes in cache.
1036 */
1037
calc_used_field_length(THD * thd,TABLE * table,bool keep_current_rowid,uint * p_used_fields,uint * p_used_fieldlength,uint * p_used_blobs,bool * p_used_null_fields,bool * p_used_uneven_bit_fields)1038 void calc_used_field_length(THD *thd,
1039 TABLE *table,
1040 bool keep_current_rowid,
1041 uint *p_used_fields,
1042 uint *p_used_fieldlength,
1043 uint *p_used_blobs,
1044 bool *p_used_null_fields,
1045 bool *p_used_uneven_bit_fields)
1046 {
1047 uint null_fields,blobs,fields,rec_length;
1048 Field **f_ptr,*field;
1049 uint uneven_bit_fields;
1050 MY_BITMAP *read_set= table->read_set;
1051
1052 uneven_bit_fields= null_fields= blobs= fields= rec_length= 0;
1053 for (f_ptr= table->field ; (field= *f_ptr) ; f_ptr++)
1054 {
1055 if (bitmap_is_set(read_set, field->field_index))
1056 {
1057 uint flags= field->flags;
1058 fields++;
1059 rec_length+= field->pack_length();
1060 if (flags & BLOB_FLAG)
1061 blobs++;
1062 if (!(flags & NOT_NULL_FLAG))
1063 null_fields++;
1064 if (field->type() == MYSQL_TYPE_BIT &&
1065 ((Field_bit*)field)->bit_len)
1066 uneven_bit_fields++;
1067 }
1068 }
1069 if (null_fields || uneven_bit_fields)
1070 rec_length+= (table->s->null_fields + 7) / 8;
1071 if (table->is_nullable())
1072 rec_length+= sizeof(my_bool);
1073 if (blobs)
1074 {
1075 uint blob_length=(uint) (table->file->stats.mean_rec_length-
1076 (table->s->reclength - rec_length));
1077 rec_length+= max<uint>(4U, blob_length);
1078 }
1079
1080 if (keep_current_rowid)
1081 {
1082 rec_length+= table->file->ref_length;
1083 fields++;
1084 }
1085
1086 *p_used_fields= fields;
1087 *p_used_fieldlength= rec_length;
1088 *p_used_blobs= blobs;
1089 *p_used_null_fields= null_fields;
1090 *p_used_uneven_bit_fields= uneven_bit_fields;
1091 }
1092
1093
init_ref_access()1094 bool JOIN::init_ref_access()
1095 {
1096 DBUG_ENTER("JOIN::init_ref_access");
1097 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
1098
1099 for (uint tableno= const_tables; tableno < tables; tableno++)
1100 {
1101 JOIN_TAB *const tab= best_ref[tableno];
1102
1103 if (tab->type() == JT_REF) // Here JT_REF means all kinds of ref access
1104 {
1105 DBUG_ASSERT(tab->position() && tab->position()->key);
1106 if (create_ref_for_key(this, tab, tab->position()->key,
1107 tab->prefix_tables()))
1108 DBUG_RETURN(true);
1109 }
1110 }
1111
1112 DBUG_RETURN(false);
1113 }
1114
1115
1116 /**
1117 Set the first_sj_inner_tab and last_sj_inner_tab fields for all tables
1118 inside the semijoin nests of the query.
1119 */
set_semijoin_info()1120 void JOIN::set_semijoin_info()
1121 {
1122 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
1123 if (select_lex->sj_nests.is_empty())
1124 return;
1125
1126 for (uint tableno= const_tables; tableno < tables; )
1127 {
1128 JOIN_TAB *const tab= best_ref[tableno];
1129 const POSITION *const pos= tab->position();
1130
1131 if (!pos)
1132 {
1133 tableno++;
1134 continue;
1135 }
1136 switch (pos->sj_strategy)
1137 {
1138 case SJ_OPT_NONE:
1139 tableno++;
1140 break;
1141 case SJ_OPT_MATERIALIZE_LOOKUP:
1142 case SJ_OPT_MATERIALIZE_SCAN:
1143 case SJ_OPT_LOOSE_SCAN:
1144 case SJ_OPT_DUPS_WEEDOUT:
1145 case SJ_OPT_FIRST_MATCH:
1146 /*
1147 Remember the first and last semijoin inner tables; this serves to tell
1148 a JOIN_TAB's semijoin strategy (like in setup_join_buffering()).
1149 */
1150 plan_idx last_sj_tab= tableno + pos->n_sj_tables - 1;
1151 plan_idx last_sj_inner=
1152 (pos->sj_strategy == SJ_OPT_DUPS_WEEDOUT) ?
1153 /* Range may end with non-inner table so cannot set last_sj_inner_tab */
1154 NO_PLAN_IDX : last_sj_tab;
1155 for (plan_idx tab_in_range= tableno;
1156 tab_in_range <= last_sj_tab;
1157 tab_in_range++)
1158 {
1159 best_ref[tab_in_range]->set_first_sj_inner(tableno);
1160 best_ref[tab_in_range]->set_last_sj_inner(last_sj_inner);
1161 }
1162 tableno+= pos->n_sj_tables;
1163 break;
1164 }
1165 }
1166 }
1167
1168
calc_length_and_keyparts(Key_use * keyuse,JOIN_TAB * tab,const uint key,table_map used_tables,Key_use ** chosen_keyuses,uint * length_out,uint * keyparts_out,table_map * dep_map,bool * maybe_null)1169 void calc_length_and_keyparts(Key_use *keyuse, JOIN_TAB *tab, const uint key,
1170 table_map used_tables,Key_use **chosen_keyuses,
1171 uint *length_out, uint *keyparts_out,
1172 table_map *dep_map, bool *maybe_null)
1173 {
1174 DBUG_ASSERT(!dep_map || maybe_null);
1175 uint keyparts= 0, length= 0;
1176 uint found_part_ref_or_null= 0;
1177 KEY *const keyinfo= tab->table()->key_info + key;
1178
1179 do
1180 {
1181 /*
1182 This Key_use is chosen if:
1183 - it involves a key part at the right place (if index is (a,b) we
1184 can have a search criterion on 'b' only if we also have a criterion
1185 on 'a'),
1186 - it references only tables earlier in the plan.
1187 Moreover, the execution layer is limited to maximum one ref_or_null
1188 keypart, as TABLE_REF::null_ref_key is only one byte.
1189 */
1190 if (!(~used_tables & keyuse->used_tables) &&
1191 keyparts == keyuse->keypart &&
1192 !(found_part_ref_or_null & keyuse->optimize))
1193 {
1194 DBUG_ASSERT(keyparts <= MAX_REF_PARTS);
1195 if (chosen_keyuses)
1196 chosen_keyuses[keyparts]= keyuse;
1197 keyparts++;
1198 length+= keyinfo->key_part[keyuse->keypart].store_length;
1199 found_part_ref_or_null|= keyuse->optimize;
1200 if (dep_map)
1201 {
1202 *dep_map|= keyuse->val->used_tables();
1203 *maybe_null|= keyinfo->key_part[keyuse->keypart].null_bit &&
1204 (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL);
1205 }
1206 }
1207 keyuse++;
1208 } while (keyuse->table_ref == tab->table_ref && keyuse->key == key);
1209 DBUG_ASSERT(keyparts > 0);
1210 *length_out= length;
1211 *keyparts_out= keyparts;
1212 }
1213
1214
1215 /**
1216 Setup a ref access for looking up rows via an index (a key).
1217
1218 @param join The join object being handled
1219 @param j The join_tab which will have the ref access populated
1220 @param first_keyuse First key part of (possibly multi-part) key
1221 @param used_tables Bitmap of available tables
1222
1223 @return False if success, True if error
1224
1225 Given a Key_use structure that specifies the fields that can be used
1226 for index access, this function creates and set up the structure
1227 used for index look up via one of the access methods {JT_FT,
1228 JT_CONST, JT_REF_OR_NULL, JT_REF, JT_EQ_REF} for the plan operator
1229 'j'. Generally the function sets up the structure j->ref (of type
1230 TABLE_REF), and the access method j->type.
1231
1232 @note We cannot setup fields used for ref access before we have sorted
1233 the items within multiple equalities according to the final order of
1234 the tables involved in the join operation. Currently, this occurs in
1235 @see substitute_for_best_equal_field().
1236 The exception is ref access for const tables, which are fixed
1237 before the greedy search planner is invoked.
1238 */
1239
create_ref_for_key(JOIN * join,JOIN_TAB * j,Key_use * org_keyuse,table_map used_tables)1240 bool create_ref_for_key(JOIN *join, JOIN_TAB *j, Key_use *org_keyuse,
1241 table_map used_tables)
1242 {
1243 DBUG_ENTER("create_ref_for_key");
1244
1245 Key_use *keyuse= org_keyuse;
1246 const uint key= keyuse->key;
1247 const bool ftkey= (keyuse->keypart == FT_KEYPART);
1248 THD *const thd= join->thd;
1249 uint keyparts, length;
1250 TABLE *const table= j->table();
1251 KEY *const keyinfo= table->key_info+key;
1252 Key_use *chosen_keyuses[MAX_REF_PARTS];
1253
1254 DBUG_ASSERT(j->keys().is_set(org_keyuse->key));
1255
1256 /* Calculate the length of the used key. */
1257 if (ftkey)
1258 {
1259 Item_func_match *ifm=(Item_func_match *)keyuse->val;
1260
1261 length=0;
1262 keyparts=1;
1263 ifm->get_master()->join_key= 1;
1264 }
1265 else /* not ftkey */
1266 calc_length_and_keyparts(keyuse, j, key, used_tables, chosen_keyuses,
1267 &length, &keyparts, NULL, NULL);
1268 /* set up fieldref */
1269 j->ref().key_parts=keyparts;
1270 j->ref().key_length=length;
1271 j->ref().key=(int) key;
1272 if (!(j->ref().key_buff= (uchar*) thd->mem_calloc(ALIGN_SIZE(length)*2)) ||
1273 !(j->ref().key_copy= (store_key**) thd->alloc((sizeof(store_key*) *
1274 (keyparts)))) ||
1275 !(j->ref().items= (Item**) thd->alloc(sizeof(Item*)*keyparts)) ||
1276 !(j->ref().cond_guards= (bool**) thd->alloc(sizeof(uint*)*keyparts)))
1277 {
1278 DBUG_RETURN(TRUE);
1279 }
1280 j->ref().key_buff2=j->ref().key_buff+ALIGN_SIZE(length);
1281 j->ref().key_err=1;
1282 j->ref().has_record= FALSE;
1283 j->ref().null_rejecting= 0;
1284 j->ref().use_count= 0;
1285 j->ref().disable_cache= FALSE;
1286 keyuse=org_keyuse;
1287
1288 uchar *key_buff= j->ref().key_buff;
1289 uchar *null_ref_key= NULL;
1290 bool keyuse_uses_no_tables= true;
1291 if (ftkey)
1292 {
1293 j->ref().items[0]=((Item_func*)(keyuse->val))->key_item();
1294 /* Predicates pushed down into subquery can't be used FT access */
1295 j->ref().cond_guards[0]= NULL;
1296 if (keyuse->used_tables)
1297 DBUG_RETURN(TRUE); // not supported yet. SerG
1298
1299 j->set_type(JT_FT);
1300 j->set_ft_func((Item_func_match *)keyuse->val);
1301 memset(j->ref().key_copy, 0, sizeof(j->ref().key_copy[0]) * keyparts);
1302 }
1303 else
1304 {
1305 // Set up TABLE_REF based on chosen Key_use-s.
1306 for (uint part_no= 0 ; part_no < keyparts ; part_no++)
1307 {
1308 keyuse= chosen_keyuses[part_no];
1309 uint maybe_null= MY_TEST(keyinfo->key_part[part_no].null_bit);
1310
1311 if (keyuse->val->type() == Item::FIELD_ITEM)
1312 {
1313 // Look up the most appropriate field to base the ref access on.
1314 keyuse->val= get_best_field(static_cast<Item_field *>(keyuse->val),
1315 join->cond_equal);
1316 keyuse->used_tables= keyuse->val->used_tables();
1317 }
1318 j->ref().items[part_no]=keyuse->val; // Save for cond removal
1319 j->ref().cond_guards[part_no]= keyuse->cond_guard;
1320 if (keyuse->null_rejecting)
1321 j->ref().null_rejecting|= (key_part_map)1 << part_no;
1322 keyuse_uses_no_tables= keyuse_uses_no_tables && !keyuse->used_tables;
1323
1324 store_key* s_key= get_store_key(thd,
1325 keyuse,join->const_table_map,
1326 &keyinfo->key_part[part_no],
1327 key_buff, maybe_null);
1328 if (unlikely(!s_key || thd->is_fatal_error))
1329 DBUG_RETURN(TRUE);
1330
1331 if (keyuse->used_tables)
1332 /* Comparing against a non-constant. */
1333 j->ref().key_copy[part_no]= s_key;
1334 else
1335 {
1336 /**
1337 The outer reference is to a const table, so we copy the value
1338 straight from that table now (during optimization), instead of from
1339 the temporary table created during execution.
1340
1341 TODO: Synchronize with the temporary table creation code, so that
1342 there is no need to create a column for this value.
1343 */
1344 bool dummy_value= false;
1345 keyuse->val->walk(&Item::repoint_const_outer_ref,
1346 Item::WALK_PREFIX,
1347 pointer_cast<uchar*>(&dummy_value));
1348 /*
1349 key is const, copy value now and possibly skip it while ::exec().
1350
1351 Note:
1352 Result check of store_key::copy() is unnecessary,
1353 it could be an error returned by store_key::copy() method
1354 but stored value is not null and default value could be used
1355 in this case. Methods which used for storing the value
1356 should be responsible for proper null value setting
1357 in case of an error. Thus it's enough to check s_key->null_key
1358 value only.
1359 */
1360 (void) s_key->copy();
1361 /*
1362 It should be reevaluated in ::exec() if
1363 constant evaluated to NULL value which we might need to
1364 handle as a special case during JOIN::exec()
1365 (As in : 'Full scan on NULL key')
1366 */
1367 if (s_key->null_key)
1368 j->ref().key_copy[part_no]= s_key; // Reevaluate in JOIN::exec()
1369 else
1370 j->ref().key_copy[part_no]= NULL;
1371 }
1372 /*
1373 Remember if we are going to use REF_OR_NULL
1374 But only if field _really_ can be null i.e. we force JT_REF
1375 instead of JT_REF_OR_NULL in case if field can't be null
1376 */
1377 if ((keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL) && maybe_null)
1378 {
1379 DBUG_ASSERT(null_ref_key == NULL); // or we would overwrite it below
1380 null_ref_key= key_buff;
1381 }
1382 key_buff+=keyinfo->key_part[part_no].store_length;
1383 }
1384 } /* not ftkey */
1385 if (j->type() == JT_FT)
1386 DBUG_RETURN(false);
1387 if (j->type() == JT_CONST)
1388 j->table()->const_table= 1;
1389 else if (((actual_key_flags(keyinfo) &
1390 (HA_NOSAME | HA_NULL_PART_KEY)) != HA_NOSAME) ||
1391 keyparts != actual_key_parts(keyinfo) || null_ref_key)
1392 {
1393 /* Must read with repeat */
1394 j->set_type(null_ref_key ? JT_REF_OR_NULL : JT_REF);
1395 j->ref().null_ref_key= null_ref_key;
1396 }
1397 else if (keyuse_uses_no_tables &&
1398 !(table->file->ha_table_flags() & HA_BLOCK_CONST_TABLE))
1399 {
1400 /*
1401 This happen if we are using a constant expression in the ON part
1402 of an LEFT JOIN.
1403 SELECT * FROM a LEFT JOIN b ON b.key=30
1404 Here we should not mark the table as a 'const' as a field may
1405 have a 'normal' value or a NULL value.
1406 */
1407 j->set_type(JT_CONST);
1408 j->position()->rows_fetched= 1.0;
1409 }
1410 else
1411 {
1412 j->set_type(JT_EQ_REF);
1413 j->position()->rows_fetched= 1.0;
1414 }
1415
1416 DBUG_RETURN(false);
1417 }
1418
1419
1420
1421 static store_key *
get_store_key(THD * thd,Key_use * keyuse,table_map used_tables,KEY_PART_INFO * key_part,uchar * key_buff,uint maybe_null)1422 get_store_key(THD *thd, Key_use *keyuse, table_map used_tables,
1423 KEY_PART_INFO *key_part, uchar *key_buff, uint maybe_null)
1424 {
1425 if (!((~used_tables) & keyuse->used_tables)) // if const item
1426 {
1427 return new store_key_const_item(thd,
1428 key_part->field,
1429 key_buff + maybe_null,
1430 maybe_null ? key_buff : 0,
1431 key_part->length,
1432 keyuse->val);
1433 }
1434
1435 Item_field *field_item= NULL;
1436 if (keyuse->val->type() == Item::FIELD_ITEM)
1437 field_item= static_cast<Item_field*>(keyuse->val->real_item());
1438 else if (keyuse->val->type() == Item::REF_ITEM)
1439 {
1440 Item_ref *item_ref= static_cast<Item_ref*>(keyuse->val);
1441 if (item_ref->ref_type() == Item_ref::OUTER_REF)
1442 {
1443 if ((*item_ref->ref)->type() == Item::FIELD_ITEM)
1444 field_item= static_cast<Item_field*>(item_ref->real_item());
1445 else if ((*(Item_ref**)(item_ref)->ref)->ref_type()
1446 == Item_ref::DIRECT_REF
1447 &&
1448 item_ref->real_item()->type() == Item::FIELD_ITEM)
1449 field_item= static_cast<Item_field*>(item_ref->real_item());
1450 }
1451 }
1452 if (field_item)
1453 return new store_key_field(thd,
1454 key_part->field,
1455 key_buff + maybe_null,
1456 maybe_null ? key_buff : 0,
1457 key_part->length,
1458 field_item->field,
1459 keyuse->val->full_name());
1460
1461 return new store_key_item(thd,
1462 key_part->field,
1463 key_buff + maybe_null,
1464 maybe_null ? key_buff : 0,
1465 key_part->length,
1466 keyuse->val);
1467 }
1468
1469
copy_inner()1470 enum store_key::store_key_result store_key_hash_item::copy_inner()
1471 {
1472 enum store_key_result res= store_key_item::copy_inner();
1473 if (res != STORE_KEY_FATAL)
1474 *hash= unique_hash(to_field, hash);
1475 return res;
1476 }
1477
1478
1479 /**
1480 Extend e1 by AND'ing e2 to the condition e1 points to. The resulting
1481 condition is fixed. Requirement: the input Items must already have
1482 been fixed.
1483
1484 @param[in,out] e1 Pointer to condition that will be extended with e2
1485 @param e2 Condition that will extend e1
1486
1487 @retval true if there was a memory allocation error, in which case
1488 e1 remains unchanged
1489 @retval false otherwise
1490 */
1491
and_conditions(Item ** e1,Item * e2)1492 bool and_conditions(Item **e1, Item *e2)
1493 {
1494 DBUG_ASSERT(!(*e1) || (*e1)->fixed);
1495 DBUG_ASSERT(!e2 || e2->fixed);
1496 if (*e1)
1497 {
1498 if (!e2)
1499 return false;
1500 Item *res= new Item_cond_and(*e1, e2);
1501 if (unlikely(!res))
1502 return true;
1503
1504 *e1= res;
1505 res->quick_fix_field();
1506 res->update_used_tables();
1507
1508 }
1509 else
1510 *e1= e2;
1511 return false;
1512 }
1513
1514
1515 #define ICP_COND_USES_INDEX_ONLY 10
1516
1517 /*
1518 Get a part of the condition that can be checked using only index fields
1519
1520 SYNOPSIS
1521 make_cond_for_index()
1522 cond The source condition
1523 table The table that is partially available
1524 keyno The index in the above table. Only fields covered by the index
1525 are available
1526 other_tbls_ok TRUE <=> Fields of other non-const tables are allowed
1527
1528 DESCRIPTION
1529 Get a part of the condition that can be checked when for the given table
1530 we have values only of fields covered by some index. The condition may
1531 refer to other tables, it is assumed that we have values of all of their
1532 fields.
1533
1534 Example:
1535 make_cond_for_index(
1536 "cond(t1.field) AND cond(t2.key1) AND cond(t2.non_key) AND cond(t2.key2)",
1537 t2, keyno(t2.key1))
1538 will return
1539 "cond(t1.field) AND cond(t2.key2)"
1540
1541 RETURN
1542 Index condition, or NULL if no condition could be inferred.
1543 */
1544
make_cond_for_index(Item * cond,TABLE * table,uint keyno,bool other_tbls_ok)1545 static Item *make_cond_for_index(Item *cond, TABLE *table, uint keyno,
1546 bool other_tbls_ok)
1547 {
1548 DBUG_ASSERT(cond != NULL);
1549
1550 if (cond->type() == Item::COND_ITEM)
1551 {
1552 uint n_marked= 0;
1553 if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
1554 {
1555 table_map used_tables= 0;
1556 Item_cond_and *new_cond=new Item_cond_and;
1557 if (!new_cond)
1558 return NULL;
1559 List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
1560 Item *item;
1561 while ((item=li++))
1562 {
1563 Item *fix= make_cond_for_index(item, table, keyno, other_tbls_ok);
1564 if (fix)
1565 {
1566 new_cond->argument_list()->push_back(fix);
1567 used_tables|= fix->used_tables();
1568 }
1569 n_marked += MY_TEST(item->marker == ICP_COND_USES_INDEX_ONLY);
1570 }
1571 if (n_marked ==((Item_cond*)cond)->argument_list()->elements)
1572 cond->marker= ICP_COND_USES_INDEX_ONLY;
1573 switch (new_cond->argument_list()->elements) {
1574 case 0:
1575 return NULL;
1576 case 1:
1577 new_cond->set_used_tables(used_tables);
1578 return new_cond->argument_list()->head();
1579 default:
1580 new_cond->quick_fix_field();
1581 new_cond->set_used_tables(used_tables);
1582 return new_cond;
1583 }
1584 }
1585 else /* It's OR */
1586 {
1587 Item_cond_or *new_cond=new Item_cond_or;
1588 if (!new_cond)
1589 return NULL;
1590 List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
1591 Item *item;
1592 while ((item=li++))
1593 {
1594 Item *fix= make_cond_for_index(item, table, keyno, other_tbls_ok);
1595 if (!fix)
1596 return NULL;
1597 new_cond->argument_list()->push_back(fix);
1598 n_marked += MY_TEST(item->marker == ICP_COND_USES_INDEX_ONLY);
1599 }
1600 if (n_marked ==((Item_cond*)cond)->argument_list()->elements)
1601 cond->marker= ICP_COND_USES_INDEX_ONLY;
1602 new_cond->quick_fix_field();
1603 new_cond->set_used_tables(cond->used_tables());
1604 new_cond->top_level_item();
1605 return new_cond;
1606 }
1607 }
1608
1609 if (!uses_index_fields_only(cond, table, keyno, other_tbls_ok))
1610 {
1611 /*
1612 Reset marker since it might have the value
1613 ICP_COND_USES_INDEX_ONLY if this condition is part of the select
1614 condition for multiple tables.
1615 */
1616 cond->marker= 0;
1617 return NULL;
1618 }
1619 cond->marker= ICP_COND_USES_INDEX_ONLY;
1620 return cond;
1621 }
1622
1623
make_cond_remainder(Item * cond,bool exclude_index)1624 static Item *make_cond_remainder(Item *cond, bool exclude_index)
1625 {
1626 if (exclude_index && cond->marker == ICP_COND_USES_INDEX_ONLY)
1627 return 0; /* Already checked */
1628
1629 if (cond->type() == Item::COND_ITEM)
1630 {
1631 table_map tbl_map= 0;
1632 if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
1633 {
1634 /* Create new top level AND item */
1635 Item_cond_and *new_cond=new Item_cond_and;
1636 if (!new_cond)
1637 return (Item*) 0;
1638 List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
1639 Item *item;
1640 while ((item=li++))
1641 {
1642 Item *fix= make_cond_remainder(item, exclude_index);
1643 if (fix)
1644 {
1645 new_cond->argument_list()->push_back(fix);
1646 tbl_map |= fix->used_tables();
1647 }
1648 }
1649 switch (new_cond->argument_list()->elements) {
1650 case 0:
1651 return (Item*) 0;
1652 case 1:
1653 return new_cond->argument_list()->head();
1654 default:
1655 new_cond->quick_fix_field();
1656 new_cond->set_used_tables(tbl_map);
1657 return new_cond;
1658 }
1659 }
1660 else /* It's OR */
1661 {
1662 Item_cond_or *new_cond=new Item_cond_or;
1663 if (!new_cond)
1664 return (Item*) 0;
1665 List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
1666 Item *item;
1667 while ((item=li++))
1668 {
1669 Item *fix= make_cond_remainder(item, FALSE);
1670 if (!fix)
1671 return (Item*) 0;
1672 new_cond->argument_list()->push_back(fix);
1673 tbl_map |= fix->used_tables();
1674 }
1675 new_cond->quick_fix_field();
1676 new_cond->set_used_tables(tbl_map);
1677 new_cond->top_level_item();
1678 return new_cond;
1679 }
1680 }
1681 return cond;
1682 }
1683
1684
1685 /**
1686 Try to extract and push the index condition down to table handler
1687
1688 @param join_tab join_tab for table
1689 @param keyno Index for which extract and push the condition
1690 @param trace_obj trace object where information is to be added
1691 */
push_index_cond(const JOIN_TAB * join_tab,uint keyno,Opt_trace_object * trace_obj)1692 void QEP_TAB::push_index_cond(const JOIN_TAB *join_tab,
1693 uint keyno,
1694 Opt_trace_object *trace_obj)
1695 {
1696 JOIN *const join_= join();
1697 DBUG_ENTER("push_index_cond");
1698
1699 ASSERT_BEST_REF_IN_JOIN_ORDER(join_);
1700 DBUG_ASSERT(join_tab == join_->best_ref[idx()]);
1701
1702 if (join_tab->reversed_access) // @todo: historical limitation, lift it!
1703 DBUG_VOID_RETURN;
1704
1705 TABLE *const tbl= table();
1706
1707 // Disable ICP for Innodb intrinsic temp table because of performance
1708 if (internal_tmp_disk_storage_engine == TMP_TABLE_INNODB &&
1709 tbl->s->db_type() == innodb_hton &&
1710 tbl->s->tmp_table != NO_TMP_TABLE &&
1711 tbl->s->tmp_table != TRANSACTIONAL_TMP_TABLE)
1712 DBUG_VOID_RETURN;
1713
1714 // TODO: Currently, index on virtual generated column doesn't support ICP
1715 if (tbl->vfield && tbl->index_contains_some_virtual_gcol(keyno))
1716 DBUG_VOID_RETURN;
1717
1718 /*
1719 Fields of other non-const tables aren't allowed in following cases:
1720 type is:
1721 (JT_ALL | JT_INDEX_SCAN | JT_RANGE | JT_INDEX_MERGE)
1722 and BNL is used.
1723 and allowed otherwise.
1724 */
1725 const bool other_tbls_ok=
1726 !((type() == JT_ALL || type() == JT_INDEX_SCAN ||
1727 type() == JT_RANGE || type() == JT_INDEX_MERGE) &&
1728 join_tab->use_join_cache() == JOIN_CACHE::ALG_BNL);
1729
1730 /*
1731 We will only attempt to push down an index condition when the
1732 following criteria are true:
1733 0. The table has a select condition
1734 1. The storage engine supports ICP.
1735 2. The index_condition_pushdown switch is on and
1736 the use of ICP is not disabled by the NO_ICP hint.
1737 3. The query is not a multi-table update or delete statement. The reason
1738 for this requirement is that the same handler will be used
1739 both for doing the select/join and the update. The pushed index
1740 condition might then also be applied by the storage engine
1741 when doing the update part and result in either not finding
1742 the record to update or updating the wrong record.
1743 4. The JOIN_TAB is not part of a subquery that has guarded conditions
1744 that can be turned on or off during execution of a 'Full scan on NULL
1745 key'.
1746 @see Item_in_optimizer::val_int()
1747 @see subselect_single_select_engine::exec()
1748 @see TABLE_REF::cond_guards
1749 @see setup_join_buffering
1750 5. The join type is not CONST or SYSTEM. The reason for excluding
1751 these join types, is that these are optimized to only read the
1752 record once from the storage engine and later re-use it. In a
1753 join where a pushed index condition evaluates fields from
1754 tables earlier in the join sequence, the pushed condition would
1755 only be evaluated the first time the record value was needed.
1756 6. The index is not a clustered index. The performance improvement
1757 of pushing an index condition on a clustered key is much lower
1758 than on a non-clustered key. This restriction should be
1759 re-evaluated when WL#6061 is implemented.
1760 7. The index on virtual generated columns is not supported for ICP.
1761 */
1762 if (condition() &&
1763 tbl->file->index_flags(keyno, 0, 1) &
1764 HA_DO_INDEX_COND_PUSHDOWN &&
1765 hint_key_state(join_->thd, tbl, keyno, ICP_HINT_ENUM,
1766 OPTIMIZER_SWITCH_INDEX_CONDITION_PUSHDOWN) &&
1767 join_->thd->lex->sql_command != SQLCOM_UPDATE_MULTI &&
1768 join_->thd->lex->sql_command != SQLCOM_DELETE_MULTI &&
1769 !has_guarded_conds() &&
1770 type() != JT_CONST && type() != JT_SYSTEM &&
1771 !(keyno == tbl->s->primary_key &&
1772 tbl->file->primary_key_is_clustered()))
1773 {
1774 DBUG_EXECUTE("where", print_where(condition(), "full cond",
1775 QT_ORDINARY););
1776 Item *idx_cond= make_cond_for_index(condition(), tbl,
1777 keyno, other_tbls_ok);
1778 DBUG_EXECUTE("where", print_where(idx_cond, "idx cond", QT_ORDINARY););
1779 if (idx_cond)
1780 {
1781 /*
1782 Check that the condition to push actually contains fields from
1783 the index. Without any fields from the index it is unlikely
1784 that it will filter out any records since the conditions on
1785 fields from other tables in most cases have already been
1786 evaluated.
1787 */
1788 idx_cond->update_used_tables();
1789 if ((idx_cond->used_tables() & table_ref->map()) == 0)
1790 {
1791 /*
1792 The following assert is to check that we only skip pushing the
1793 index condition for the following situations:
1794 1. We actually are allowed to generate an index condition on another
1795 table.
1796 2. The index condition is a constant item.
1797 3. The index condition contains an updatable user variable
1798 (test this by checking that the RAND_TABLE_BIT is set).
1799 */
1800 DBUG_ASSERT(other_tbls_ok || // 1
1801 idx_cond->const_item() || // 2
1802 (idx_cond->used_tables() & RAND_TABLE_BIT) ); // 3
1803 DBUG_VOID_RETURN;
1804 }
1805
1806 Item *idx_remainder_cond= 0;
1807
1808 /*
1809 For BKA cache we store condition to special BKA cache field
1810 because evaluation of the condition requires additional operations
1811 before the evaluation. This condition is used in
1812 JOIN_CACHE_BKA[_UNIQUE]::skip_index_tuple() functions.
1813 */
1814 if (join_tab->use_join_cache() &&
1815 /*
1816 if cache is used then the value is TRUE only
1817 for BKA[_UNIQUE] cache (see setup_join_buffering() func).
1818 In this case other_tbls_ok is an equivalent of
1819 cache->is_key_access().
1820 */
1821 other_tbls_ok &&
1822 (idx_cond->used_tables() &
1823 ~(table_ref->map() | join_->const_table_map)))
1824 {
1825 cache_idx_cond= idx_cond;
1826 trace_obj->add("pushed_to_BKA", true);
1827 }
1828 else
1829 {
1830 idx_remainder_cond= tbl->file->idx_cond_push(keyno, idx_cond);
1831 DBUG_EXECUTE("where",
1832 print_where(tbl->file->pushed_idx_cond, "icp cond",
1833 QT_ORDINARY););
1834 }
1835 /*
1836 Disable eq_ref's "lookup cache" if we've pushed down an index
1837 condition.
1838 TODO: This check happens to work on current ICP implementations, but
1839 there may exist a compliant implementation that will not work
1840 correctly with it. Sort this out when we stabilize the condition
1841 pushdown APIs.
1842 */
1843 if (idx_remainder_cond != idx_cond)
1844 {
1845 ref().disable_cache= TRUE;
1846 trace_obj->add("pushed_index_condition", idx_cond);
1847 }
1848
1849 Item *row_cond= make_cond_remainder(condition(), TRUE);
1850 DBUG_EXECUTE("where", print_where(row_cond, "remainder cond",
1851 QT_ORDINARY););
1852
1853 if (row_cond)
1854 {
1855 if (idx_remainder_cond)
1856 and_conditions(&row_cond, idx_remainder_cond);
1857 idx_remainder_cond= row_cond;
1858 }
1859 set_condition(idx_remainder_cond);
1860 trace_obj->add("table_condition_attached", idx_remainder_cond);
1861 }
1862 }
1863 DBUG_VOID_RETURN;
1864 }
1865
1866
1867 /**
1868 Setup the materialized table for a semi-join nest
1869
1870 @param tab join_tab for the materialized semi-join table
1871 @param tableno table number of materialized table
1872 @param inner_pos information about the first inner table of the subquery
1873 @param sjm_pos information about the materialized semi-join table,
1874 to be filled with data.
1875
1876 @details
1877 Setup execution structures for one semi-join materialization nest:
1878 - Create the materialization temporary table, including TABLE_LIST object.
1879 - Create a list of Item_field objects per column in the temporary table.
1880 - Create a keyuse array describing index lookups into the table
1881 (for MaterializeLookup)
1882
1883 @return False if OK, True if error
1884 */
1885
setup_semijoin_materialized_table(JOIN_TAB * tab,uint tableno,const POSITION * inner_pos,POSITION * sjm_pos)1886 bool JOIN::setup_semijoin_materialized_table(JOIN_TAB *tab, uint tableno,
1887 const POSITION *inner_pos,
1888 POSITION *sjm_pos)
1889 {
1890 DBUG_ENTER("JOIN::setup_semijoin_materialized_table");
1891 const TABLE_LIST *const emb_sj_nest= inner_pos->table->emb_sj_nest;
1892 Semijoin_mat_optimize *const sjm_opt= &emb_sj_nest->nested_join->sjm;
1893 Semijoin_mat_exec *const sjm_exec= tab->sj_mat_exec();
1894 const uint field_count= emb_sj_nest->nested_join->sj_inner_exprs.elements;
1895
1896 DBUG_ASSERT(inner_pos->sj_strategy == SJ_OPT_MATERIALIZE_LOOKUP ||
1897 inner_pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN);
1898
1899 /*
1900 Set up the table to write to, do as
1901 Query_result_union::create_result_table does
1902 */
1903 sjm_exec->table_param= Temp_table_param();
1904 count_field_types(select_lex, &sjm_exec->table_param,
1905 emb_sj_nest->nested_join->sj_inner_exprs, false, true);
1906 sjm_exec->table_param.bit_fields_as_long= true;
1907
1908 char buffer[NAME_LEN];
1909 const size_t len= my_snprintf(buffer, sizeof(buffer) - 1, "<subquery%u>",
1910 emb_sj_nest->nested_join->query_block_id);
1911 char *name= (char *)alloc_root(thd->mem_root, len + 1);
1912 if (name == NULL)
1913 DBUG_RETURN(true); /* purecov: inspected */
1914
1915 memcpy(name, buffer, len);
1916 name[len] = '\0';
1917 TABLE *table;
1918 if (!(table= create_tmp_table(thd, &sjm_exec->table_param,
1919 emb_sj_nest->nested_join->sj_inner_exprs,
1920 NULL,
1921 true /* distinct */,
1922 true /* save_sum_fields */,
1923 thd->variables.option_bits |
1924 TMP_TABLE_ALL_COLUMNS,
1925 HA_POS_ERROR /* rows_limit */,
1926 name)))
1927 DBUG_RETURN(true); /* purecov: inspected */
1928 sjm_exec->table= table;
1929 map2table[tableno]= tab;
1930 table->file->extra(HA_EXTRA_WRITE_CACHE);
1931 table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
1932 sj_tmp_tables.push_back(table);
1933 sjm_exec_list.push_back(sjm_exec);
1934
1935 /*
1936 Hash_field is not applicable for MATERIALIZE_LOOKUP. If hash_field is
1937 created for temporary table, semijoin_types_allow_materialization must
1938 assure that MATERIALIZE_LOOKUP can't be chosen.
1939 */
1940 DBUG_ASSERT((inner_pos->sj_strategy == SJ_OPT_MATERIALIZE_LOOKUP &&
1941 !table->hash_field) ||
1942 inner_pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN);
1943
1944 TABLE_LIST *tl;
1945 if (!(tl= (TABLE_LIST *) alloc_root(thd->mem_root, sizeof(TABLE_LIST))))
1946 DBUG_RETURN(true); /* purecov: inspected */
1947 // TODO: May have to setup outer-join info for this TABLE_LIST !!!
1948
1949 tl->init_one_table("", 0, name, strlen(name), name, TL_IGNORE);
1950
1951 tl->table= table;
1952 tl->set_tableno(tableno);
1953
1954 table->pos_in_table_list= tl;
1955
1956 if (!(sjm_opt->mat_fields=
1957 (Item_field **) alloc_root(thd->mem_root,
1958 field_count * sizeof(Item_field **))))
1959 DBUG_RETURN(true);
1960
1961 for (uint fieldno= 0; fieldno < field_count; fieldno++)
1962 {
1963 if (!(sjm_opt->mat_fields[fieldno]=
1964 new Item_field(table->visible_field_ptr()[fieldno])))
1965 DBUG_RETURN(true);
1966 }
1967
1968 tab->table_ref= tl;
1969 tab->set_table(table);
1970 tab->set_position(sjm_pos);
1971
1972 tab->worst_seeks= 1.0;
1973 tab->set_records((ha_rows)emb_sj_nest->nested_join->sjm.expected_rowcount);
1974
1975 tab->found_records= tab->records();
1976 tab->read_time= (ha_rows)emb_sj_nest->nested_join->sjm.scan_cost.total_cost();
1977
1978 tab->init_join_cond_ref(tl);
1979
1980 table->keys_in_use_for_query.set_all();
1981 sjm_pos->table= tab;
1982 sjm_pos->sj_strategy= SJ_OPT_NONE;
1983
1984 sjm_pos->use_join_buffer= false;
1985 /*
1986 No need to recalculate filter_effect since there are no post-read
1987 conditions for materialized tables.
1988 */
1989 sjm_pos->filter_effect= 1.0;
1990
1991 /*
1992 Key_use objects are required so that create_ref_for_key() can set up
1993 a proper ref access for this table.
1994 */
1995 Key_use_array *keyuse=
1996 create_keyuse_for_table(thd, table, field_count, sjm_opt->mat_fields,
1997 emb_sj_nest->nested_join->sj_outer_exprs);
1998 if (!keyuse)
1999 DBUG_RETURN(true);
2000
2001 double fanout= ((uint)tab->idx() == const_tables) ?
2002 1.0 : best_ref[tab->idx() - 1]->position()->prefix_rowcount;
2003 if (!sjm_exec->is_scan)
2004 {
2005 sjm_pos->key= keyuse->begin(); // MaterializeLookup will use the index
2006 sjm_pos->read_cost= emb_sj_nest->nested_join->sjm.lookup_cost.total_cost() *
2007 fanout;
2008 tab->set_keyuse(keyuse->begin());
2009 tab->keys().set_bit(0); // There is one index - use it always
2010 tab->set_index(0);
2011 sjm_pos->rows_fetched= 1.0;
2012 tab->set_type(JT_REF);
2013 }
2014 else
2015 {
2016 sjm_pos->key= NULL; // No index use for MaterializeScan
2017 sjm_pos->read_cost= tab->read_time * fanout;
2018 sjm_pos->rows_fetched= static_cast<double>(tab->records());
2019 tab->set_type(JT_ALL);
2020 }
2021 sjm_pos->set_prefix_join_cost((tab - join_tab), cost_model());
2022
2023 DBUG_RETURN(false);
2024 }
2025
2026
2027 /**
2028 A helper function that allocates appropriate join cache object and
2029 sets next_select function of previous tab.
2030 */
2031
init_join_cache(JOIN_TAB * join_tab)2032 void QEP_TAB::init_join_cache(JOIN_TAB *join_tab)
2033 {
2034 JOIN *const join_= join();
2035 DBUG_ASSERT(idx() > 0);
2036 ASSERT_BEST_REF_IN_JOIN_ORDER(join_);
2037 DBUG_ASSERT(join_tab == join_->best_ref[idx()]);
2038
2039 JOIN_CACHE *prev_cache= NULL;
2040 if ((uint)idx() > join_->const_tables)
2041 {
2042 QEP_TAB *prev_tab= this - 1;
2043 /*
2044 Link with the previous join cache, but make sure that we do not link
2045 join caches of two different operations when the previous operation was
2046 MaterializeLookup or MaterializeScan, ie if:
2047 1. the previous join_tab has join buffering enabled, and
2048 2. the previous join_tab belongs to a materialized semi-join nest, and
2049 3. this join_tab represents a regular table, or is part of a different
2050 semi-join interval than the previous join_tab.
2051 */
2052 prev_cache= (JOIN_CACHE*)prev_tab->op;
2053 if (prev_cache != NULL && // 1
2054 sj_is_materialize_strategy(prev_tab->get_sj_strategy()) && // 2
2055 first_sj_inner() != prev_tab->first_sj_inner()) // 3
2056 prev_cache= NULL;
2057 }
2058 switch (join_tab->use_join_cache())
2059 {
2060 case JOIN_CACHE::ALG_BNL:
2061 op= new JOIN_CACHE_BNL(join_, this, prev_cache);
2062 break;
2063 case JOIN_CACHE::ALG_BKA:
2064 op= new JOIN_CACHE_BKA(join_, this, join_tab->join_cache_flags, prev_cache);
2065 break;
2066 case JOIN_CACHE::ALG_BKA_UNIQUE:
2067 op= new JOIN_CACHE_BKA_UNIQUE(join_, this, join_tab->join_cache_flags, prev_cache);
2068 break;
2069 default:
2070 DBUG_ASSERT(0);
2071
2072 }
2073 DBUG_EXECUTE_IF("jb_alloc_with_prev_fail",
2074 if (prev_cache)
2075 {
2076 DBUG_SET("+d,jb_alloc_fail");
2077 DBUG_SET("-d,jb_alloc_with_prev_fail");
2078 });
2079 if (!op || op->init())
2080 {
2081 /*
2082 OOM. If it's in creation of "op" it has thrown error.
2083 If it's in init() (allocation of the join buffer) it has not,
2084 and there's a chance to execute the query:
2085 we remove this join buffer, and all others (as there may be
2086 dependencies due to outer joins).
2087 @todo Consider sending a notification of this problem (a warning to the
2088 client, or to the error log).
2089 */
2090 for (uint i= join_->const_tables; i < join_->tables; i++)
2091 {
2092 QEP_TAB *const q= &join_->qep_tab[i];
2093 if (!q->position())
2094 continue;
2095 JOIN_TAB *const t= join_->best_ref[i];
2096 if (t->use_join_cache() == JOIN_CACHE::ALG_NONE)
2097 continue;
2098 t->set_use_join_cache(JOIN_CACHE::ALG_NONE);
2099 /*
2100 Detach the join buffer from QEP_TAB so that EXPLAIN doesn't show
2101 'Using join buffer'. Destroy the join buffer.
2102 */
2103 if (q->op)
2104 {
2105 q->op->mem_free();
2106 delete q->op;
2107 q->op= NULL;
2108 }
2109 DBUG_ASSERT(i > 0);
2110 /*
2111 Make the immediately preceding QEP_TAB channel the work to the
2112 non-buffered nested loop algorithm:
2113 */
2114 q[-1].next_select= sub_select;
2115 }
2116 }
2117 else
2118 this[-1].next_select= sub_select_op;
2119 }
2120
2121
2122 /**
2123 Plan refinement stage: do various setup things for the executor
2124
2125 @param join Join being processed
2126 @param no_jbuf_after Don't use join buffering after table with this number.
2127
2128 @return false if successful, true if error (Out of memory)
2129
2130 @details
2131 Plan refinement stage: do various set ups for the executioner
2132 - setup join buffering use
2133 - push index conditions
2134 - increment relevant counters
2135 - etc
2136 */
2137
2138 bool
make_join_readinfo(JOIN * join,uint no_jbuf_after)2139 make_join_readinfo(JOIN *join, uint no_jbuf_after)
2140 {
2141 const bool statistics= !join->thd->lex->is_explain();
2142 const bool prep_for_pos= join->need_tmp || join->select_distinct ||
2143 join->group_list || join->order;
2144
2145 DBUG_ENTER("make_join_readinfo");
2146 ASSERT_BEST_REF_IN_JOIN_ORDER(join);
2147
2148 Opt_trace_context * const trace= &join->thd->opt_trace;
2149 Opt_trace_object wrapper(trace);
2150 Opt_trace_array trace_refine_plan(trace, "refine_plan");
2151
2152 if (setup_semijoin_dups_elimination(join, no_jbuf_after))
2153 DBUG_RETURN(TRUE); /* purecov: inspected */
2154
2155
2156 for (uint i= join->const_tables; i < join->tables; i++)
2157 {
2158 QEP_TAB *const qep_tab= &join->qep_tab[i];
2159
2160 if (!qep_tab->position())
2161 continue;
2162
2163 JOIN_TAB *const tab= join->best_ref[i];
2164 TABLE *const table= qep_tab->table();
2165 /*
2166 Need to tell handlers that to play it safe, it should fetch all
2167 columns of the primary key of the tables: this is because MySQL may
2168 build row pointers for the rows, and for all columns of the primary key
2169 the read set has not necessarily been set by the server code.
2170 */
2171 if (prep_for_pos)
2172 table->prepare_for_position();
2173
2174 qep_tab->read_record.table= table;
2175 qep_tab->next_select=sub_select; /* normal select */
2176 qep_tab->cache_idx_cond= NULL;
2177 table->status= STATUS_GARBAGE | STATUS_NOT_FOUND;
2178 DBUG_ASSERT(!qep_tab->read_first_record);
2179 qep_tab->read_record.read_record= NULL;
2180 qep_tab->read_record.unlock_row= rr_unlock_row;
2181
2182 Opt_trace_object trace_refine_table(trace);
2183 trace_refine_table.add_utf8_table(qep_tab->table_ref);
2184
2185 if (qep_tab->do_loosescan())
2186 {
2187 if (!(qep_tab->loosescan_buf= (uchar*)join->thd->alloc(qep_tab->loosescan_key_len)))
2188 DBUG_RETURN(TRUE); /* purecov: inspected */
2189 }
2190
2191 if (tab->use_join_cache() != JOIN_CACHE::ALG_NONE)
2192 qep_tab->init_join_cache(tab);
2193
2194 switch (qep_tab->type()) {
2195 case JT_EQ_REF:
2196 case JT_REF_OR_NULL:
2197 case JT_REF:
2198 case JT_SYSTEM:
2199 case JT_CONST:
2200 if (table->covering_keys.is_set(qep_tab->ref().key) &&
2201 !table->no_keyread)
2202 table->set_keyread(TRUE);
2203 else
2204 qep_tab->push_index_cond(tab, qep_tab->ref().key, &trace_refine_table);
2205 break;
2206 case JT_ALL:
2207 join->thd->set_status_no_index_used();
2208 /* Fall through */
2209 case JT_INDEX_SCAN:
2210 if (tab->position()->filter_effect != COND_FILTER_STALE_NO_CONST &&
2211 !tab->sj_mat_exec())
2212 {
2213 /*
2214 rows_w_const_cond is # of rows which will be read by the access
2215 method, minus those which will not pass the constant condition;
2216 that's how calculate_scan_cost() works. Such number is useful inside
2217 the planner, but obscure to the reader of EXPLAIN; so we put the
2218 real count of read rows into rows_fetched, and move the constant
2219 condition's filter to filter_effect.
2220 */
2221 double rows_w_const_cond= qep_tab->position()->rows_fetched;
2222 table->pos_in_table_list->fetch_number_of_rows();
2223 tab->position()->rows_fetched=
2224 static_cast<double>(table->file->stats.records);
2225 if (tab->position()->filter_effect != COND_FILTER_STALE)
2226 {
2227 // Constant condition moves to filter_effect:
2228 if (tab->position()->rows_fetched == 0) // avoid division by zero
2229 tab->position()->filter_effect= 0.0f;
2230 else
2231 tab->position()->filter_effect*=
2232 static_cast<float>(rows_w_const_cond/tab->position()->rows_fetched);
2233 }
2234 }
2235 if (tab->use_quick == QS_DYNAMIC_RANGE)
2236 {
2237 join->thd->set_status_no_good_index_used();
2238 if (statistics)
2239 join->thd->inc_status_select_range_check();
2240 }
2241 else
2242 {
2243 if (statistics)
2244 {
2245 if (i == join->const_tables)
2246 join->thd->inc_status_select_scan();
2247 else
2248 join->thd->inc_status_select_full_join();
2249 }
2250 }
2251 break;
2252 case JT_RANGE:
2253 case JT_INDEX_MERGE:
2254 if (statistics)
2255 {
2256 if (i == join->const_tables)
2257 join->thd->inc_status_select_range();
2258 else
2259 join->thd->inc_status_select_full_range_join();
2260 }
2261 if (!table->no_keyread && qep_tab->type() == JT_RANGE)
2262 {
2263 if (table->covering_keys.is_set(qep_tab->quick()->index))
2264 {
2265 DBUG_ASSERT(qep_tab->quick()->index != MAX_KEY);
2266 table->set_keyread(TRUE);
2267 }
2268 if (!table->key_read)
2269 qep_tab->push_index_cond(tab, qep_tab->quick()->index,
2270 &trace_refine_table);
2271 }
2272 if (tab->position()->filter_effect != COND_FILTER_STALE_NO_CONST)
2273 {
2274 double rows_w_const_cond= qep_tab->position()->rows_fetched;
2275 qep_tab->position()->rows_fetched= rows2double(tab->quick()->records);
2276 if (tab->position()->filter_effect != COND_FILTER_STALE)
2277 {
2278 // Constant condition moves to filter_effect:
2279 if (tab->position()->rows_fetched == 0) // avoid division by zero
2280 tab->position()->filter_effect= 0.0f;
2281 else
2282 tab->position()->filter_effect*=
2283 static_cast<float>(rows_w_const_cond/tab->position()->rows_fetched);
2284 }
2285 }
2286 break;
2287 case JT_FT:
2288 if (tab->join()->fts_index_access(tab))
2289 {
2290 table->set_keyread(true);
2291 table->covering_keys.set_bit(tab->ft_func()->key);
2292 }
2293 break;
2294 default:
2295 DBUG_PRINT("error",("Table type %d found",qep_tab->type())); /* purecov: deadcode */
2296 DBUG_ASSERT(0);
2297 break; /* purecov: deadcode */
2298 }
2299
2300 if (tab->position()->filter_effect <= COND_FILTER_STALE)
2301 {
2302 /*
2303 Give a proper value for EXPLAIN.
2304 For performance reasons, we do not recalculate the filter for
2305 non-EXPLAIN queries; thus, EXPLAIN CONNECTION may show 100%
2306 for a query.
2307 */
2308 tab->position()->filter_effect=
2309 join->thd->lex->describe ?
2310 calculate_condition_filter(tab,
2311 (tab->ref().key != -1) ? tab->position()->key : NULL,
2312 tab->prefix_tables() & ~tab->table_ref->map(),
2313 tab->position()->rows_fetched,
2314 false) : COND_FILTER_ALLPASS;
2315 }
2316
2317 qep_tab->pick_table_access_method(tab);
2318
2319 // Materialize derived tables prior to accessing them.
2320 if (tab->table_ref->uses_materialization())
2321 qep_tab->materialize_table= join_materialize_derived;
2322
2323 if (qep_tab->sj_mat_exec())
2324 qep_tab->materialize_table= join_materialize_semijoin;
2325 }
2326
2327 DBUG_RETURN(FALSE);
2328 }
2329
2330
2331 /**
2332 Give error if we some tables are done with a full join.
2333
2334 This is used by multi_table_update and multi_table_delete when running
2335 in safe mode.
2336
2337 @param join Join condition
2338
2339 @retval
2340 0 ok
2341 @retval
2342 1 Error (full join used)
2343 */
2344
error_if_full_join(JOIN * join)2345 bool error_if_full_join(JOIN *join)
2346 {
2347 ASSERT_BEST_REF_IN_JOIN_ORDER(join);
2348 for (uint i= 0; i < join->primary_tables; i++)
2349 {
2350 JOIN_TAB *const tab= join->best_ref[i];
2351 THD *thd = join->thd;
2352
2353 /*
2354 Safe update error isn't returned if:
2355 1) It is an EXPLAIN statement OR
2356 2) Table is not the target.
2357
2358 Append the first warning (if any) to the error message. Allows the user
2359 to understand why index access couldn't be chosen.
2360 */
2361
2362 if (!thd->lex->is_explain() && tab->table()->pos_in_table_list->updating &&
2363 tab->type() == JT_ALL)
2364 {
2365 my_error(ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE, MYF(0),
2366 thd->get_stmt_da()->get_first_condition_message());
2367 return true;
2368 }
2369 }
2370 return false;
2371 }
2372
2373
2374 /**
2375 Cleanup table of join operation.
2376 */
2377
cleanup()2378 void JOIN_TAB::cleanup()
2379 {
2380 // Delete parts specific of JOIN_TAB:
2381
2382 if (table())
2383 table()->reginfo.join_tab= NULL;
2384
2385 // Delete shared parts:
2386 if (join()->qep_tab)
2387 {
2388 // deletion will be done by QEP_TAB
2389 }
2390 else
2391 qs_cleanup();
2392
2393 TRASH(this, sizeof(*this));
2394 }
2395
2396
cleanup()2397 void QEP_TAB::cleanup()
2398 {
2399 // Delete parts specific of QEP_TAB:
2400 delete filesort;
2401 filesort= NULL;
2402 end_read_record(&read_record);
2403 if (quick_optim() != quick())
2404 delete quick_optim();
2405
2406 TABLE *const t= table();
2407
2408 if (t)
2409 t->reginfo.qep_tab= NULL;
2410
2411 // Delete shared parts:
2412 qs_cleanup();
2413
2414 // Order of qs_cleanup() and this, matters:
2415 if (op)
2416 {
2417 if (op->type() == QEP_operation::OT_TMP_TABLE)
2418 {
2419 if (t) // Check tmp table is not yet freed.
2420 free_tmp_table(current_thd, t);
2421 delete tmp_table_param;
2422 tmp_table_param= NULL;
2423 }
2424 op->mem_free();
2425 }
2426
2427 TRASH(this, sizeof(*this));
2428 }
2429
2430
qs_cleanup()2431 void QEP_shared_owner::qs_cleanup()
2432 {
2433 /* Skip non-existing derived tables/views result tables */
2434 if (table() &&
2435 (table()->s->tmp_table != INTERNAL_TMP_TABLE || table()->is_created()))
2436 {
2437 table()->set_keyread(FALSE);
2438 table()->file->ha_index_or_rnd_end();
2439 free_io_cache(table());
2440 filesort_free_buffers(table(), true);
2441 TABLE_LIST *const table_ref= table()->pos_in_table_list;
2442 if (table_ref)
2443 {
2444 table_ref->derived_keys_ready= false;
2445 table_ref->derived_key_list.empty();
2446 }
2447 }
2448 delete quick();
2449 TRASH(this, sizeof(*this));
2450 }
2451
2452
sjm_query_block_id() const2453 uint QEP_TAB::sjm_query_block_id() const
2454 {
2455 DBUG_ASSERT(sj_is_materialize_strategy(get_sj_strategy()));
2456 for (uint i= 0; i < join()->primary_tables; ++i)
2457 {
2458 // Find the sj-mat tmp table whose sj nest contains us:
2459 Semijoin_mat_exec *const sjm= join()->qep_tab[i].sj_mat_exec();
2460 if (sjm &&
2461 (uint)idx() >= sjm->inner_table_index &&
2462 (uint)idx() < sjm->inner_table_index + sjm->table_count)
2463 return sjm->sj_nest->nested_join->query_block_id;
2464 }
2465 DBUG_ASSERT(false);
2466 return 0;
2467 }
2468
2469
2470 /**
2471 Extend join_tab->cond by AND'ing add_cond to it
2472
2473 @param add_cond The condition to AND with the existing cond
2474 for this JOIN_TAB
2475
2476 @retval true if there was a memory allocation error
2477 @retval false otherwise
2478 */
and_with_condition(Item * add_cond)2479 bool QEP_shared_owner::and_with_condition(Item *add_cond)
2480 {
2481 Item *tmp= condition();
2482 if (and_conditions(&tmp, add_cond))
2483 return true;
2484 set_condition(tmp);
2485 return false;
2486 }
2487
2488
2489 /**
2490 Partially cleanup JOIN after it has executed: close index or rnd read
2491 (table cursors), free quick selects.
2492
2493 This function is called in the end of execution of a JOIN, before the used
2494 tables are unlocked and closed.
2495
2496 For a join that is resolved using a temporary table, the first sweep is
2497 performed against actual tables and an intermediate result is inserted
2498 into the temprorary table.
2499 The last sweep is performed against the temporary table. Therefore,
2500 the base tables and associated buffers used to fill the temporary table
2501 are no longer needed, and this function is called to free them.
2502
2503 For a join that is performed without a temporary table, this function
2504 is called after all rows are sent, but before EOF packet is sent.
2505
2506 For a simple SELECT with no subqueries this function performs a full
2507 cleanup of the JOIN and calls mysql_unlock_read_tables to free used base
2508 tables.
2509
2510 If a JOIN is executed for a subquery or if it has a subquery, we can't
2511 do the full cleanup and need to do a partial cleanup only.
2512 - If a JOIN is not the top level join, we must not unlock the tables
2513 because the outer select may not have been evaluated yet, and we
2514 can't unlock only selected tables of a query.
2515 - Additionally, if this JOIN corresponds to a correlated subquery, we
2516 should not free quick selects and join buffers because they will be
2517 needed for the next execution of the correlated subquery.
2518 - However, if this is a JOIN for a [sub]select, which is not
2519 a correlated subquery itself, but has subqueries, we can free it
2520 fully and also free JOINs of all its subqueries. The exception
2521 is a subquery in SELECT list, e.g: @n
2522 SELECT a, (select max(b) from t1) group by c @n
2523 This subquery will not be evaluated at first sweep and its value will
2524 not be inserted into the temporary table. Instead, it's evaluated
2525 when selecting from the temporary table. Therefore, it can't be freed
2526 here even though it's not correlated.
2527
2528 @todo
2529 Unlock tables even if the join isn't top level select in the tree
2530 */
2531
join_free()2532 void JOIN::join_free()
2533 {
2534 SELECT_LEX_UNIT *tmp_unit;
2535 SELECT_LEX *sl;
2536 /*
2537 Optimization: if not EXPLAIN and we are done with the JOIN,
2538 free all tables.
2539 */
2540 bool full= (!select_lex->uncacheable && !thd->lex->describe);
2541 bool can_unlock= full;
2542 DBUG_ENTER("JOIN::join_free");
2543
2544 cleanup();
2545
2546 for (tmp_unit= select_lex->first_inner_unit();
2547 tmp_unit;
2548 tmp_unit= tmp_unit->next_unit())
2549 for (sl= tmp_unit->first_select(); sl; sl= sl->next_select())
2550 {
2551 Item_subselect *subselect= sl->master_unit()->item;
2552 bool full_local= full && (!subselect || subselect->is_evaluated());
2553 /*
2554 If this join is evaluated, we can partially clean it up and clean up
2555 all its underlying joins even if they are correlated, only query plan
2556 is left in case a user will run EXPLAIN FOR CONNECTION.
2557 If this join is not yet evaluated, we still must clean it up to
2558 close its table cursors -- it may never get evaluated, as in case of
2559 ... HAVING FALSE OR a IN (SELECT ...))
2560 but all table cursors must be closed before the unlock.
2561 */
2562 sl->cleanup_all_joins();
2563 /* Can't unlock if at least one JOIN is still needed */
2564 can_unlock= can_unlock && full_local;
2565 }
2566
2567 /*
2568 We are not using tables anymore
2569 Unlock all tables. We may be in an INSERT .... SELECT statement.
2570 */
2571 if (can_unlock && lock && thd->lock && ! thd->locked_tables_mode &&
2572 !(select_lex->active_options() & SELECT_NO_UNLOCK) &&
2573 !select_lex->subquery_in_having &&
2574 (select_lex == (thd->lex->unit->fake_select_lex ?
2575 thd->lex->unit->fake_select_lex : thd->lex->select_lex)))
2576 {
2577 /*
2578 TODO: unlock tables even if the join isn't top level select in the
2579 tree.
2580 */
2581 mysql_unlock_read_tables(thd, lock); // Don't free join->lock
2582 DEBUG_SYNC(thd, "after_join_free_unlock");
2583 lock= 0;
2584 }
2585
2586 DBUG_VOID_RETURN;
2587 }
2588
2589
2590 /**
2591 Free resources of given join.
2592
2593 @param fill true if we should free all resources, call with full==1
2594 should be last, before it this function can be called with
2595 full==0
2596
2597 @note
2598 With subquery this function definitely will be called several times,
2599 but even for simple query it can be called several times.
2600 */
2601
cleanup()2602 void JOIN::cleanup()
2603 {
2604 DBUG_ENTER("JOIN::cleanup");
2605
2606 DBUG_ASSERT(const_tables <= primary_tables &&
2607 primary_tables <= tables);
2608
2609 if (qep_tab || join_tab || best_ref)
2610 {
2611 for (uint i= 0; i < tables; i++)
2612 {
2613 QEP_TAB *qtab;
2614 TABLE *table;
2615 QEP_operation *op;
2616 if (qep_tab)
2617 {
2618 DBUG_ASSERT(!join_tab);
2619 qtab= &qep_tab[i];
2620 op= qtab->op;
2621 table= qtab->table();
2622 }
2623 else
2624 {
2625 qtab= NULL;
2626 op= NULL;
2627 table= (join_tab ? &join_tab[i] : best_ref[i])->table();
2628 }
2629 if (!table)
2630 continue;
2631 if (table->is_created())
2632 {
2633 table->file->ha_index_or_rnd_end();
2634 if (op && op->type() == QEP_operation::OT_TMP_TABLE)
2635 {
2636 int tmp;
2637 if ((tmp= table->file->extra(HA_EXTRA_NO_CACHE)))
2638 table->file->print_error(tmp, MYF(0));
2639 }
2640 }
2641 free_io_cache(table);
2642 filesort_free_buffers(table, false);
2643 }
2644 }
2645
2646 /* Restore ref array to original state */
2647 if (current_ref_ptrs != items0)
2648 {
2649 set_items_ref_array(items0);
2650 set_group_rpa= false;
2651 }
2652 DBUG_VOID_RETURN;
2653 }
2654
2655
2656 /**
2657 Filter out ORDER items those are equal to constants in WHERE
2658
2659 This function is a limited version of remove_const() for use
2660 with non-JOIN statements (i.e. single-table UPDATE and DELETE).
2661
2662
2663 @param order Linked list of ORDER BY arguments
2664 @param cond WHERE expression
2665
2666 @return pointer to new filtered ORDER list or NULL if whole list eliminated
2667
2668 @note
2669 This function overwrites input order list.
2670 */
2671
simple_remove_const(ORDER * order,Item * where)2672 ORDER *simple_remove_const(ORDER *order, Item *where)
2673 {
2674 if (!order || !where)
2675 return order;
2676
2677 ORDER *first= NULL, *prev= NULL;
2678 for (; order; order= order->next)
2679 {
2680 DBUG_ASSERT(!order->item[0]->with_sum_func); // should never happen
2681 if (!const_expression_in_where(where, order->item[0]))
2682 {
2683 if (!first)
2684 first= order;
2685 if (prev)
2686 prev->next= order;
2687 prev= order;
2688 }
2689 }
2690 if (prev)
2691 prev->next= NULL;
2692 return first;
2693 }
2694
2695
2696
2697
2698 /*
2699 Check if equality can be used in removing components of GROUP BY/DISTINCT
2700
2701 SYNOPSIS
2702 test_if_equality_guarantees_uniqueness()
2703 l the left comparison argument (a field if any)
2704 r the right comparison argument (a const of any)
2705
2706 DESCRIPTION
2707 Checks if an equality predicate can be used to take away
2708 DISTINCT/GROUP BY because it is known to be true for exactly one
2709 distinct value (e.g. <expr> == <const>).
2710 Arguments must be of the same type because e.g.
2711 <string_field> = <int_const> may match more than 1 distinct value from
2712 the column.
2713 We must take into consideration and the optimization done for various
2714 string constants when compared to dates etc (see Item_int_with_ref) as
2715 well as the collation of the arguments.
2716
2717 RETURN VALUE
2718 TRUE can be used
2719 FALSE cannot be used
2720 */
2721 static bool
test_if_equality_guarantees_uniqueness(Item * l,Item * r)2722 test_if_equality_guarantees_uniqueness(Item *l, Item *r)
2723 {
2724 return r->const_item() &&
2725 /* elements must be compared as dates */
2726 (Arg_comparator::can_compare_as_dates(l, r, 0) ||
2727 /* or of the same result type */
2728 (r->result_type() == l->result_type() &&
2729 /* and must have the same collation if compared as strings */
2730 (l->result_type() != STRING_RESULT ||
2731 l->collation.collation == r->collation.collation)));
2732 }
2733
2734
2735 /*
2736 Return TRUE if i1 and i2 (if any) are equal items,
2737 or if i1 is a wrapper item around the f2 field.
2738 */
2739
equal(Item * i1,Item * i2,Field * f2)2740 static bool equal(Item *i1, Item *i2, Field *f2)
2741 {
2742 DBUG_ASSERT((i2 == NULL) ^ (f2 == NULL));
2743
2744 if (i2 != NULL)
2745 return i1->eq(i2, 1);
2746 else if (i1->type() == Item::FIELD_ITEM)
2747 return f2->eq(((Item_field *) i1)->field);
2748 else
2749 return FALSE;
2750 }
2751
2752
2753 /**
2754 Test if a field or an item is equal to a constant value in WHERE
2755
2756 @param cond WHERE clause expression
2757 @param comp_item Item to find in WHERE expression
2758 (if comp_field != NULL)
2759 @param comp_field Field to find in WHERE expression
2760 (if comp_item != NULL)
2761 @param[out] const_item intermediate arg, set to Item pointer to NULL
2762
2763 @return TRUE if the field is a constant value in WHERE
2764
2765 @note
2766 comp_item and comp_field parameters are mutually exclusive.
2767 */
2768 bool
const_expression_in_where(Item * cond,Item * comp_item,Field * comp_field,Item ** const_item)2769 const_expression_in_where(Item *cond, Item *comp_item, Field *comp_field,
2770 Item **const_item)
2771 {
2772 DBUG_ASSERT((comp_item == NULL) ^ (comp_field == NULL));
2773
2774 Item *intermediate= NULL;
2775 if (const_item == NULL)
2776 const_item= &intermediate;
2777
2778 if (cond->type() == Item::COND_ITEM)
2779 {
2780 bool and_level= (((Item_cond*) cond)->functype()
2781 == Item_func::COND_AND_FUNC);
2782 List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
2783 Item *item;
2784 while ((item=li++))
2785 {
2786 bool res=const_expression_in_where(item, comp_item, comp_field,
2787 const_item);
2788 if (res) // Is a const value
2789 {
2790 if (and_level)
2791 return 1;
2792 }
2793 else if (!and_level)
2794 return 0;
2795 }
2796 return and_level ? 0 : 1;
2797 }
2798 else if (cond->eq_cmp_result() != Item::COND_OK)
2799 { // boolean compare function
2800 Item_func* func= (Item_func*) cond;
2801 if (func->functype() != Item_func::EQUAL_FUNC &&
2802 func->functype() != Item_func::EQ_FUNC)
2803 return 0;
2804 Item *left_item= ((Item_func*) cond)->arguments()[0];
2805 Item *right_item= ((Item_func*) cond)->arguments()[1];
2806 if (equal(left_item, comp_item, comp_field))
2807 {
2808 if (test_if_equality_guarantees_uniqueness (left_item, right_item))
2809 {
2810 if (*const_item)
2811 return right_item->eq(*const_item, 1);
2812 *const_item=right_item;
2813 return 1;
2814 }
2815 }
2816 else if (equal(right_item, comp_item, comp_field))
2817 {
2818 if (test_if_equality_guarantees_uniqueness (right_item, left_item))
2819 {
2820 if (*const_item)
2821 return left_item->eq(*const_item, 1);
2822 *const_item=left_item;
2823 return 1;
2824 }
2825 }
2826 }
2827 return 0;
2828 }
2829
2830
2831 /**
2832 Update TMP_TABLE_PARAM with count of the different type of fields.
2833
2834 This function counts the number of fields, functions and sum
2835 functions (items with type SUM_FUNC_ITEM) for use by
2836 create_tmp_table() and stores it in the Temp_table_param object. It
2837 also resets and calculates the quick_group property, which may have
2838 to be reverted if this function is called after deciding to use
2839 ROLLUP (see JOIN::optimize_rollup()).
2840
2841 @param select_lex SELECT_LEX of query
2842 @param param Description of temp table
2843 @param fields List of fields to count
2844 @param reset_with_sum_func Whether to reset with_sum_func of func items
2845 @param save_sum_fields Count in the way create_tmp_table() expects when
2846 given the same parameter.
2847 */
2848
2849 void
count_field_types(SELECT_LEX * select_lex,Temp_table_param * param,List<Item> & fields,bool reset_with_sum_func,bool save_sum_fields)2850 count_field_types(SELECT_LEX *select_lex, Temp_table_param *param,
2851 List<Item> &fields, bool reset_with_sum_func,
2852 bool save_sum_fields)
2853 {
2854 List_iterator<Item> li(fields);
2855 Item *field;
2856
2857 param->field_count= 0;
2858 param->sum_func_count= 0;
2859 param->func_count= 0;
2860 param->hidden_field_count= 0;
2861 param->outer_sum_func_count= 0;
2862 param->quick_group=1;
2863 /*
2864 Loose index scan guarantees that all grouping is done and MIN/MAX
2865 functions are computed, so create_tmp_table() treats this as if
2866 save_sum_fields is set.
2867 */
2868 save_sum_fields|= param->precomputed_group_by;
2869
2870 while ((field=li++))
2871 {
2872 Item::Type real_type= field->real_item()->type();
2873 if (real_type == Item::FIELD_ITEM)
2874 param->field_count++;
2875 else if (real_type == Item::SUM_FUNC_ITEM)
2876 {
2877 if (! field->const_item())
2878 {
2879 Item_sum *sum_item=(Item_sum*) field->real_item();
2880 if (!sum_item->depended_from() ||
2881 sum_item->depended_from() == select_lex)
2882 {
2883 if (!sum_item->quick_group)
2884 param->quick_group=0; // UDF SUM function
2885 param->sum_func_count++;
2886
2887 for (uint i=0 ; i < sum_item->get_arg_count() ; i++)
2888 {
2889 if (sum_item->get_arg(i)->real_item()->type() == Item::FIELD_ITEM)
2890 param->field_count++;
2891 else
2892 param->func_count++;
2893 }
2894 }
2895 param->func_count++;
2896 }
2897 else if (save_sum_fields)
2898 {
2899 /*
2900 Count the way create_tmp_table() does if asked to preserve
2901 Item_sum_* functions in fields list.
2902
2903 Item field is an Item_sum_* or a reference to such an
2904 item. We need to distinguish between these two cases since
2905 they are treated differently by create_tmp_table().
2906 */
2907 if (field->type() == Item::SUM_FUNC_ITEM) // An Item_sum_*
2908 param->field_count++;
2909 else // A reference to an Item_sum_*
2910 {
2911 param->func_count++;
2912 param->sum_func_count++;
2913 }
2914 }
2915 }
2916 else
2917 {
2918 param->func_count++;
2919 if (reset_with_sum_func)
2920 field->with_sum_func=0;
2921 if (field->with_sum_func)
2922 param->outer_sum_func_count++;
2923 }
2924 }
2925 }
2926
2927
2928 /**
2929 Return 1 if second is a subpart of first argument.
2930
2931 If first parts has different direction, change it to second part
2932 (group is sorted like order)
2933 */
2934
2935 bool
test_if_subpart(ORDER * a,ORDER * b)2936 test_if_subpart(ORDER *a,ORDER *b)
2937 {
2938 for (; a && b; a=a->next,b=b->next)
2939 {
2940 if ((*a->item)->eq(*b->item,1))
2941 a->direction= b->direction;
2942 else
2943 return 0;
2944 }
2945 return MY_TEST(!b);
2946 }
2947
2948 /**
2949 calc how big buffer we need for comparing group entries.
2950 */
2951
2952 void
calc_group_buffer(JOIN * join,ORDER * group)2953 calc_group_buffer(JOIN *join,ORDER *group)
2954 {
2955 uint key_length=0, parts=0, null_parts=0;
2956
2957 if (group)
2958 join->grouped= true;
2959 for (; group ; group=group->next)
2960 {
2961 Item *group_item= *group->item;
2962 Field *field= group_item->get_tmp_table_field();
2963 if (field)
2964 {
2965 enum_field_types type;
2966 if ((type= field->type()) == MYSQL_TYPE_BLOB)
2967 key_length+=MAX_BLOB_WIDTH; // Can't be used as a key
2968 else if (type == MYSQL_TYPE_VARCHAR || type == MYSQL_TYPE_VAR_STRING)
2969 key_length+= field->field_length + HA_KEY_BLOB_LENGTH;
2970 else if (type == MYSQL_TYPE_BIT)
2971 {
2972 /* Bit is usually stored as a longlong key for group fields */
2973 key_length+= 8; // Big enough
2974 }
2975 else
2976 key_length+= field->pack_length();
2977 }
2978 else
2979 {
2980 switch (group_item->result_type()) {
2981 case REAL_RESULT:
2982 key_length+= sizeof(double);
2983 break;
2984 case INT_RESULT:
2985 key_length+= sizeof(longlong);
2986 break;
2987 case DECIMAL_RESULT:
2988 key_length+= my_decimal_get_binary_size(group_item->max_length -
2989 (group_item->decimals ? 1 : 0),
2990 group_item->decimals);
2991 break;
2992 case STRING_RESULT:
2993 {
2994 /*
2995 As items represented as DATE/TIME fields in the group buffer
2996 have STRING_RESULT result type, we increase the length
2997 by 8 as maximum pack length of such fields.
2998 */
2999 if (group_item->is_temporal())
3000 {
3001 key_length+= 8;
3002 }
3003 else if (group_item->field_type() == MYSQL_TYPE_BLOB)
3004 key_length+= MAX_BLOB_WIDTH; // Can't be used as a key
3005 else
3006 {
3007 /*
3008 Group strings are taken as varstrings and require an length field.
3009 A field is not yet created by create_tmp_field()
3010 and the sizes should match up.
3011 */
3012 key_length+= group_item->max_length + HA_KEY_BLOB_LENGTH;
3013 }
3014 break;
3015 }
3016 default:
3017 /* This case should never be choosen */
3018 DBUG_ASSERT(0);
3019 my_error(ER_OUT_OF_RESOURCES, MYF(ME_FATALERROR));
3020 }
3021 }
3022 parts++;
3023 if (group_item->maybe_null)
3024 null_parts++;
3025 }
3026 join->tmp_table_param.group_length=key_length+null_parts;
3027 join->tmp_table_param.group_parts=parts;
3028 join->tmp_table_param.group_null_parts=null_parts;
3029 }
3030
3031
3032
3033 /**
3034 Make an array of pointers to sum_functions to speed up
3035 sum_func calculation.
3036
3037 @retval
3038 0 ok
3039 @retval
3040 1 Error
3041 */
3042
alloc_func_list()3043 bool JOIN::alloc_func_list()
3044 {
3045 uint func_count, group_parts;
3046 DBUG_ENTER("alloc_func_list");
3047
3048 func_count= tmp_table_param.sum_func_count;
3049 /*
3050 If we are using rollup, we need a copy of the summary functions for
3051 each level
3052 */
3053 if (rollup.state != ROLLUP::STATE_NONE)
3054 func_count*= (send_group_parts+1);
3055
3056 group_parts= send_group_parts;
3057 /*
3058 If distinct, reserve memory for possible
3059 disctinct->group_by optimization
3060 */
3061 if (select_distinct)
3062 {
3063 group_parts+= fields_list.elements;
3064 /*
3065 If the ORDER clause is specified then it's possible that
3066 it also will be optimized, so reserve space for it too
3067 */
3068 if (order)
3069 {
3070 ORDER *ord;
3071 for (ord= order; ord; ord= ord->next)
3072 group_parts++;
3073 }
3074 }
3075
3076 /* This must use calloc() as rollup_make_fields depends on this */
3077 sum_funcs= (Item_sum**) thd->mem_calloc(sizeof(Item_sum**) * (func_count+1) +
3078 sizeof(Item_sum***) * (group_parts+1));
3079 sum_funcs_end= (Item_sum***) (sum_funcs + func_count + 1);
3080 DBUG_RETURN(sum_funcs == 0);
3081 }
3082
3083
3084 /**
3085 Initialize 'sum_funcs' array with all Item_sum objects.
3086
3087 @param field_list All items
3088 @param send_result_set_metadata Items in select list
3089 @param before_group_by Set to 1 if this is called before GROUP BY handling
3090 @param recompute Set to TRUE if sum_funcs must be recomputed
3091
3092 @retval
3093 0 ok
3094 @retval
3095 1 error
3096 */
3097
make_sum_func_list(List<Item> & field_list,List<Item> & send_result_set_metadata,bool before_group_by,bool recompute)3098 bool JOIN::make_sum_func_list(List<Item> &field_list,
3099 List<Item> &send_result_set_metadata,
3100 bool before_group_by, bool recompute)
3101 {
3102 List_iterator_fast<Item> it(field_list);
3103 Item_sum **func;
3104 Item *item;
3105 DBUG_ENTER("make_sum_func_list");
3106
3107 if (*sum_funcs && !recompute)
3108 DBUG_RETURN(FALSE); /* We have already initialized sum_funcs. */
3109
3110 func= sum_funcs;
3111 while ((item=it++))
3112 {
3113 if (item->type() == Item::SUM_FUNC_ITEM && !item->const_item() &&
3114 (!((Item_sum*) item)->depended_from() ||
3115 ((Item_sum *)item)->depended_from() == select_lex))
3116 *func++= (Item_sum*) item;
3117 }
3118 if (before_group_by && rollup.state == ROLLUP::STATE_INITED)
3119 {
3120 rollup.state= ROLLUP::STATE_READY;
3121 if (rollup_make_fields(field_list, send_result_set_metadata, &func))
3122 DBUG_RETURN(TRUE); // Should never happen
3123 }
3124 else if (rollup.state == ROLLUP::STATE_NONE)
3125 {
3126 for (uint i=0 ; i <= send_group_parts ;i++)
3127 sum_funcs_end[i]= func;
3128 }
3129 else if (rollup.state == ROLLUP::STATE_READY)
3130 DBUG_RETURN(FALSE); // Don't put end marker
3131 *func=0; // End marker
3132 DBUG_RETURN(FALSE);
3133 }
3134
3135
3136 /**
3137 Free joins of subselect of this select.
3138
3139 @param thd THD pointer
3140 @param select pointer to st_select_lex which subselects joins we will free
3141 */
3142
free_underlaid_joins(THD * thd,SELECT_LEX * select)3143 void free_underlaid_joins(THD *thd, SELECT_LEX *select)
3144 {
3145 for (SELECT_LEX_UNIT *unit= select->first_inner_unit();
3146 unit;
3147 unit= unit->next_unit())
3148 unit->cleanup(false);
3149 }
3150
3151 /****************************************************************************
3152 ROLLUP handling
3153 ****************************************************************************/
3154
3155 /**
3156 Wrap all constant Items in GROUP BY list.
3157
3158 For ROLLUP queries each constant item referenced in GROUP BY list
3159 is wrapped up into an Item_func object yielding the same value
3160 as the constant item. The objects of the wrapper class are never
3161 considered as constant items and besides they inherit all
3162 properties of the Item_result_field class.
3163 This wrapping allows us to ensure writing constant items
3164 into temporary tables whenever the result of the ROLLUP
3165 operation has to be written into a temporary table, e.g. when
3166 ROLLUP is used together with DISTINCT in the SELECT list.
3167 Usually when creating temporary tables for a intermidiate
3168 result we do not include fields for constant expressions.
3169
3170 @retval
3171 0 if ok
3172 @retval
3173 1 on error
3174 */
3175
rollup_process_const_fields()3176 bool JOIN::rollup_process_const_fields()
3177 {
3178 ORDER *group_tmp;
3179 Item *item;
3180 List_iterator<Item> it(all_fields);
3181
3182 for (group_tmp= group_list; group_tmp; group_tmp= group_tmp->next)
3183 {
3184 if (!(*group_tmp->item)->const_item())
3185 continue;
3186 while ((item= it++))
3187 {
3188 if (*group_tmp->item == item)
3189 {
3190 Item* new_item= new Item_func_rollup_const(item);
3191 if (!new_item)
3192 return 1;
3193 new_item->fix_fields(thd, (Item **) 0);
3194 thd->change_item_tree(it.ref(), new_item);
3195 for (ORDER *tmp= group_tmp; tmp; tmp= tmp->next)
3196 {
3197 if (*tmp->item == item)
3198 thd->change_item_tree(tmp->item, new_item);
3199 }
3200 break;
3201 }
3202 }
3203 it.rewind();
3204 }
3205 return 0;
3206 }
3207
3208
3209 /**
3210 Fill up rollup structures with pointers to fields to use.
3211
3212 Creates copies of item_sum items for each sum level.
3213
3214 @param fields_arg List of all fields (hidden and real ones)
3215 @param sel_fields Pointer to selected fields
3216 @param func Store here a pointer to all fields
3217
3218 @retval
3219 0 if ok;
3220 In this case func is pointing to next not used element.
3221 @retval
3222 1 on error
3223 */
3224
rollup_make_fields(List<Item> & fields_arg,List<Item> & sel_fields,Item_sum *** func)3225 bool JOIN::rollup_make_fields(List<Item> &fields_arg, List<Item> &sel_fields,
3226 Item_sum ***func)
3227 {
3228 List_iterator_fast<Item> it(fields_arg);
3229 Item *first_field= sel_fields.head();
3230 uint level;
3231
3232 /*
3233 Create field lists for the different levels
3234
3235 The idea here is to have a separate field list for each rollup level to
3236 avoid all runtime checks of which columns should be NULL.
3237
3238 The list is stored in reverse order to get sum function in such an order
3239 in func that it makes it easy to reset them with init_sum_functions()
3240
3241 Assuming: SELECT a, b, c SUM(b) FROM t1 GROUP BY a,b WITH ROLLUP
3242
3243 rollup.fields[0] will contain list where a,b,c is NULL
3244 rollup.fields[1] will contain list where b,c is NULL
3245 ...
3246 rollup.ref_pointer_array[#] points to fields for rollup.fields[#]
3247 ...
3248 sum_funcs_end[0] points to all sum functions
3249 sum_funcs_end[1] points to all sum functions, except grand totals
3250 ...
3251 */
3252
3253 for (level=0 ; level < send_group_parts ; level++)
3254 {
3255 uint i;
3256 uint pos= send_group_parts - level -1;
3257 bool real_fields= 0;
3258 Item *item;
3259 List_iterator<Item> new_it(rollup.fields[pos]);
3260 Ref_ptr_array ref_array_start= rollup.ref_pointer_arrays[pos];
3261 ORDER *start_group;
3262
3263 /* Point to first hidden field */
3264 uint ref_array_ix= fields_arg.elements-1;
3265
3266 /* Remember where the sum functions ends for the previous level */
3267 sum_funcs_end[pos+1]= *func;
3268
3269 /* Find the start of the group for this level */
3270 for (i= 0, start_group= group_list ;
3271 i++ < pos ;
3272 start_group= start_group->next)
3273 ;
3274
3275 it.rewind();
3276 while ((item= it++))
3277 {
3278 if (item == first_field)
3279 {
3280 real_fields= 1; // End of hidden fields
3281 ref_array_ix= 0;
3282 }
3283
3284 if (item->type() == Item::SUM_FUNC_ITEM && !item->const_item() &&
3285 (!((Item_sum*) item)->depended_from() ||
3286 ((Item_sum *)item)->depended_from() == select_lex))
3287
3288 {
3289 /*
3290 This is a top level summary function that must be replaced with
3291 a sum function that is reset for this level.
3292
3293 NOTE: This code creates an object which is not that nice in a
3294 sub select. Fortunately it's not common to have rollup in
3295 sub selects.
3296 */
3297 item= item->copy_or_same(thd);
3298 ((Item_sum*) item)->make_unique();
3299 *(*func)= (Item_sum*) item;
3300 (*func)++;
3301 }
3302 else
3303 {
3304 /* Check if this is something that is part of this group by */
3305 ORDER *group_tmp;
3306 for (group_tmp= start_group, i= pos ;
3307 group_tmp ; group_tmp= group_tmp->next, i++)
3308 {
3309 if (*group_tmp->item == item)
3310 {
3311 /*
3312 This is an element that is used by the GROUP BY and should be
3313 set to NULL in this level
3314 */
3315 Item_null_result *null_item=
3316 new (thd->mem_root) Item_null_result(item->field_type(),
3317 item->result_type());
3318 if (!null_item)
3319 return 1;
3320 item->maybe_null= 1; // Value will be null sometimes
3321 null_item->result_field= item->get_tmp_table_field();
3322 item= null_item;
3323 break;
3324 }
3325 }
3326 }
3327 ref_array_start[ref_array_ix]= item;
3328 if (real_fields)
3329 {
3330 (void) new_it++; // Point to next item
3331 new_it.replace(item); // Replace previous
3332 ref_array_ix++;
3333 }
3334 else
3335 ref_array_ix--;
3336 }
3337 }
3338 sum_funcs_end[0]= *func; // Point to last function
3339 return 0;
3340 }
3341
3342
3343 /**
3344 clear results if there are not rows found for group
3345 (end_send_group/end_write_group)
3346 @retval
3347 FALSE if OK
3348 @retval
3349 TRUE on error
3350 */
3351
clear()3352 bool JOIN::clear()
3353 {
3354 /*
3355 must clear only the non-const tables, as const tables
3356 are not re-calculated.
3357 */
3358 for (uint tableno= const_tables; tableno < primary_tables; tableno++)
3359 qep_tab[tableno].table()->set_null_row(); // All fields are NULL
3360
3361 if (copy_fields(&tmp_table_param, thd))
3362 return true;
3363
3364 if (sum_funcs)
3365 {
3366 Item_sum *func, **func_ptr= sum_funcs;
3367 while ((func= *(func_ptr++)))
3368 func->clear();
3369 }
3370 return false;
3371 }
3372
3373
3374 /**
3375 Change the Query_result object of the query block.
3376
3377 If old_result is not used, forward the call to the current
3378 Query_result in case it is a wrapper around old_result.
3379
3380 Call prepare() and prepare2() on the new Query_result if we decide
3381 to use it.
3382
3383 @param new_result New Query_result object
3384 @param old_result Old Query_result object (NULL to force change)
3385
3386 @retval false Success
3387 @retval true Error
3388 */
3389
change_query_result(Query_result_interceptor * new_result,Query_result_interceptor * old_result)3390 bool SELECT_LEX::change_query_result(Query_result_interceptor *new_result,
3391 Query_result_interceptor *old_result)
3392 {
3393 DBUG_ENTER("SELECT_LEX::change_query_result");
3394 if (old_result == NULL || query_result() == old_result)
3395 {
3396 set_query_result(new_result);
3397 if (query_result()->prepare(fields_list, master_unit()) ||
3398 query_result()->prepare2())
3399 DBUG_RETURN(true); /* purecov: inspected */
3400 DBUG_RETURN(false);
3401 }
3402 else
3403 {
3404 const bool ret= query_result()->change_query_result(new_result);
3405 DBUG_RETURN(ret);
3406 }
3407 }
3408
3409 /**
3410 Add having condition as a filter condition, which is applied when reading
3411 from the temp table.
3412
3413 @param curr_tmp_table Table number to which having conds are added.
3414 @returns false if success, true if error.
3415 */
3416
add_having_as_tmp_table_cond(uint curr_tmp_table)3417 bool JOIN::add_having_as_tmp_table_cond(uint curr_tmp_table)
3418 {
3419 having_cond->update_used_tables();
3420 QEP_TAB *const curr_table= &qep_tab[curr_tmp_table];
3421 table_map used_tables;
3422 Opt_trace_context *const trace= &thd->opt_trace;
3423
3424 DBUG_ENTER("JOIN::add_having_as_tmp_table_cond");
3425
3426 if (curr_table->table_ref)
3427 used_tables= curr_table->table_ref->map();
3428 else
3429 {
3430 /*
3431 Pushing parts of HAVING to an internal temporary table.
3432 Fields in HAVING condition may have been replaced with fields in an
3433 internal temporary table. This table has map=1, hence we check that
3434 we have no fields from other tables (outer references are fine).
3435 Unfortunaly, update_used_tables() is not reliable for subquery
3436 items, which could thus still have other tables in their
3437 used_tables() information.
3438 */
3439 DBUG_ASSERT(having_cond->has_subquery() ||
3440 !(having_cond->used_tables() & ~(1 | PSEUDO_TABLE_BITS)));
3441 used_tables= 1;
3442 }
3443
3444 /*
3445 All conditions which can be applied after reading from used_tables are
3446 added as filter conditions of curr_tmp_table. If condition's used_tables is
3447 not read yet for example subquery in having, then it will be kept as it is
3448 in original having_cond of join.
3449 */
3450 Item* sort_table_cond= make_cond_for_table(having_cond, used_tables,
3451 (table_map) 0, false);
3452 if (sort_table_cond)
3453 {
3454 if (!curr_table->condition())
3455 curr_table->set_condition(sort_table_cond);
3456 else
3457 {
3458 curr_table->set_condition(new Item_cond_and(curr_table->condition(),
3459 sort_table_cond));
3460 if (curr_table->condition()->fix_fields(thd, 0))
3461 DBUG_RETURN(true);
3462 }
3463 curr_table->condition()->top_level_item();
3464 DBUG_EXECUTE("where",print_where(curr_table->condition(),
3465 "select and having",
3466 QT_ORDINARY););
3467
3468 having_cond= make_cond_for_table(having_cond, ~ (table_map) 0,
3469 ~used_tables, false);
3470 DBUG_EXECUTE("where",
3471 print_where(having_cond, "having after sort",
3472 QT_ORDINARY););
3473
3474 Opt_trace_object trace_wrapper(trace);
3475 Opt_trace_object(trace, "sort_using_internal_table")
3476 .add("condition_for_sort", sort_table_cond)
3477 .add("having_after_sort", having_cond);
3478 }
3479
3480 DBUG_RETURN(false);
3481 }
3482
3483
3484 /**
3485 Init tmp tables usage info.
3486
3487 @details
3488 This function finalizes execution plan by taking following actions:
3489 .) tmp tables are created, but not instantiated (this is done during
3490 execution). JOIN_TABs dedicated to tmp tables are filled appropriately.
3491 see JOIN::create_intermediate_table.
3492 .) prepare fields lists (fields, all_fields, ref_pointer_array slices) for
3493 each required stage of execution. These fields lists are set for
3494 tmp tables' tabs and for the tab of last table in the join.
3495 .) fill info for sorting/grouping/dups removal is prepared and saved to
3496 appropriate tabs. Here is an example:
3497 SELECT * from t1,t2 WHERE ... GROUP BY t1.f1 ORDER BY t2.f2, t1.f2
3498 and lets assume that the table order in the plan is t1,t2.
3499 In this case optimizer will sort for group only the first table as the
3500 second one isn't mentioned in GROUP BY. The result will be materialized
3501 in tmp table. As filesort can't sort join optimizer will sort tmp table
3502 also. The first sorting (for group) is called simple as is doesn't
3503 require tmp table. The Filesort object for it is created here - in
3504 JOIN::create_intermediate_table. Filesort for the second case is
3505 created here, in JOIN::make_tmp_tables_info.
3506
3507 @note
3508 This function may change tmp_table_param.precomputed_group_by. This
3509 affects how create_tmp_table() treats aggregation functions, so
3510 count_field_types() must be called again to make sure this is taken
3511 into consideration.
3512
3513 @returns
3514 false - Ok
3515 true - Error
3516 */
3517
make_tmp_tables_info()3518 bool JOIN::make_tmp_tables_info()
3519 {
3520 DBUG_ASSERT(!join_tab);
3521 List<Item> *curr_all_fields= &all_fields;
3522 List<Item> *curr_fields_list= &fields_list;
3523 bool materialize_join= false;
3524 uint curr_tmp_table= const_tables;
3525 TABLE *exec_tmp_table= NULL;
3526 DBUG_ENTER("JOIN::make_tmp_tables_info");
3527
3528 /*
3529 In this function, we may change having_cond into a condition on a
3530 temporary sort/group table, so we have to assign having_for_explain now:
3531 */
3532 having_for_explain= having_cond;
3533
3534 const bool has_group_by= this->grouped;
3535
3536 /*
3537 Setup last table to provide fields and all_fields lists to the next
3538 node in the plan.
3539 */
3540 if (qep_tab)
3541 {
3542 qep_tab[primary_tables - 1].fields= &fields_list;
3543 qep_tab[primary_tables - 1].all_fields= &all_fields;
3544 }
3545 /*
3546 The loose index scan access method guarantees that all grouping or
3547 duplicate row elimination (for distinct) is already performed
3548 during data retrieval, and that all MIN/MAX functions are already
3549 computed for each group. Thus all MIN/MAX functions should be
3550 treated as regular functions, and there is no need to perform
3551 grouping in the main execution loop.
3552 Notice that currently loose index scan is applicable only for
3553 single table queries, thus it is sufficient to test only the first
3554 join_tab element of the plan for its access method.
3555 */
3556 if (qep_tab && qep_tab[0].quick() && qep_tab[0].quick()->is_loose_index_scan())
3557 tmp_table_param.precomputed_group_by=
3558 !qep_tab[0].quick()->is_agg_loose_index_scan();
3559
3560 /* Create a tmp table if distinct or if the sort is too complicated */
3561 if (need_tmp)
3562 {
3563 curr_tmp_table= primary_tables;
3564 tmp_tables++;
3565 if (plan_is_const())
3566 first_select= sub_select_op;
3567
3568 /*
3569 Create temporary table on first execution of this join.
3570 (Will be reused if this is a subquery that is executed several times.)
3571 Don't use tmp table grouping for json aggregate funcs as it's
3572 very ineffective.
3573 */
3574 init_items_ref_array();
3575
3576 ORDER_with_src tmp_group;
3577 if (!simple_group && !(test_flags & TEST_NO_KEY_GROUP) && !with_json_agg)
3578 tmp_group= group_list;
3579
3580 tmp_table_param.hidden_field_count=
3581 all_fields.elements - fields_list.elements;
3582
3583 if (create_intermediate_table(&qep_tab[curr_tmp_table],
3584 &all_fields, tmp_group,
3585 group_list && simple_group))
3586 DBUG_RETURN(true);
3587 exec_tmp_table= qep_tab[curr_tmp_table].table();
3588
3589 if (exec_tmp_table->distinct)
3590 optimize_distinct();
3591
3592 /*
3593 If there is no sorting or grouping, 'use_order'
3594 index result should not have been requested.
3595 Exception: LooseScan strategy for semijoin requires
3596 sorted access even if final result is not to be sorted.
3597 */
3598 DBUG_ASSERT(
3599 !(ordered_index_usage == ordered_index_void &&
3600 !plan_is_const() &&
3601 qep_tab[const_tables].position()->sj_strategy != SJ_OPT_LOOSE_SCAN &&
3602 qep_tab[const_tables].use_order()));
3603
3604 /* Change sum_fields reference to calculated fields in tmp_table */
3605 DBUG_ASSERT(items1.is_null());
3606 items1= select_lex->ref_ptr_array_slice(2);
3607 if (sort_and_group || qep_tab[curr_tmp_table].table()->group ||
3608 tmp_table_param.precomputed_group_by)
3609 {
3610 if (change_to_use_tmp_fields(thd, items1,
3611 tmp_fields_list1, tmp_all_fields1,
3612 fields_list.elements, all_fields))
3613 DBUG_RETURN(true);
3614 }
3615 else
3616 {
3617 if (change_refs_to_tmp_fields(thd, items1,
3618 tmp_fields_list1, tmp_all_fields1,
3619 fields_list.elements, all_fields))
3620 DBUG_RETURN(true);
3621 }
3622 curr_all_fields= &tmp_all_fields1;
3623 curr_fields_list= &tmp_fields_list1;
3624 // Need to set them now for correct group_fields setup, reset at the end.
3625 set_items_ref_array(items1);
3626 qep_tab[curr_tmp_table].ref_array= &items1;
3627 qep_tab[curr_tmp_table].all_fields= &tmp_all_fields1;
3628 qep_tab[curr_tmp_table].fields= &tmp_fields_list1;
3629 setup_tmptable_write_func(&qep_tab[curr_tmp_table]);
3630
3631 /*
3632 If having is not handled here, it will be checked before the row is sent
3633 to the client.
3634 */
3635 if (having_cond &&
3636 (sort_and_group || (exec_tmp_table->distinct && !group_list)))
3637 {
3638 /*
3639 If there is no select distinct then move the having to table conds of
3640 tmp table.
3641 NOTE : We cannot apply having after distinct. If columns of having are
3642 not part of select distinct, then distinct may remove rows
3643 which can satisfy having.
3644 */
3645 if (!select_distinct && add_having_as_tmp_table_cond(curr_tmp_table))
3646 DBUG_RETURN(true);
3647
3648 /*
3649 Having condition which we are not able to add as tmp table conds are
3650 kept as before. And, this will be applied before storing the rows in
3651 tmp table.
3652 */
3653 qep_tab[curr_tmp_table].having= having_cond;
3654 having_cond= NULL; // Already done
3655 }
3656
3657 tmp_table_param.func_count= 0;
3658 tmp_table_param.field_count+= tmp_table_param.func_count;
3659 if (sort_and_group || qep_tab[curr_tmp_table].table()->group)
3660 {
3661 tmp_table_param.field_count+= tmp_table_param.sum_func_count;
3662 tmp_table_param.sum_func_count= 0;
3663 }
3664
3665 if (exec_tmp_table->group)
3666 { // Already grouped
3667 if (!order && !no_order && !skip_sort_order)
3668 order= group_list; /* order by group */
3669 group_list= NULL;
3670 }
3671 /*
3672 If we have different sort & group then we must sort the data by group
3673 and copy it to another tmp table
3674 This code is also used if we are using distinct something
3675 we haven't been able to store in the temporary table yet
3676 like SEC_TO_TIME(SUM(...)).
3677 */
3678
3679 if ((group_list &&
3680 (!test_if_subpart(group_list, order) || select_distinct)) ||
3681 (select_distinct && tmp_table_param.using_outer_summary_function))
3682 { /* Must copy to another table */
3683 DBUG_PRINT("info",("Creating group table"));
3684
3685 calc_group_buffer(this, group_list);
3686 count_field_types(select_lex, &tmp_table_param, tmp_all_fields1,
3687 select_distinct && !group_list, false);
3688 tmp_table_param.hidden_field_count=
3689 tmp_all_fields1.elements - tmp_fields_list1.elements;
3690 sort_and_group= false;
3691 if (!exec_tmp_table->group && !exec_tmp_table->distinct)
3692 {
3693 // 1st tmp table were materializing join result
3694 materialize_join= true;
3695 explain_flags.set(ESC_BUFFER_RESULT, ESP_USING_TMPTABLE);
3696 }
3697 curr_tmp_table++;
3698 tmp_tables++;
3699
3700 /* group data to new table */
3701 /*
3702 If the access method is loose index scan then all MIN/MAX
3703 functions are precomputed, and should be treated as regular
3704 functions. See extended comment above.
3705 */
3706 if (qep_tab[0].quick() && qep_tab[0].quick()->is_loose_index_scan())
3707 tmp_table_param.precomputed_group_by= TRUE;
3708
3709 tmp_table_param.hidden_field_count=
3710 curr_all_fields->elements - curr_fields_list->elements;
3711 ORDER_with_src dummy= NULL; //TODO can use table->group here also
3712
3713 if (create_intermediate_table(&qep_tab[curr_tmp_table],
3714 curr_all_fields, dummy, true))
3715 DBUG_RETURN(true);
3716
3717 if (group_list)
3718 {
3719 explain_flags.set(group_list.src, ESP_USING_TMPTABLE);
3720 if (!plan_is_const()) // No need to sort a single row
3721 {
3722 if (add_sorting_to_table(curr_tmp_table - 1, &group_list))
3723 DBUG_RETURN(true);
3724 }
3725
3726 if (make_group_fields(this, this))
3727 DBUG_RETURN(true);
3728 }
3729
3730 // Setup sum funcs only when necessary, otherwise we might break info
3731 // for the first table
3732 if (group_list || tmp_table_param.sum_func_count)
3733 {
3734 if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true, true))
3735 DBUG_RETURN(true);
3736 const bool need_distinct=
3737 !(qep_tab[0].quick() && qep_tab[0].quick()->is_agg_loose_index_scan());
3738 if (prepare_sum_aggregators(sum_funcs, need_distinct))
3739 DBUG_RETURN(true);
3740 group_list= NULL;
3741 if (setup_sum_funcs(thd, sum_funcs))
3742 DBUG_RETURN(true);
3743 }
3744 // No sum funcs anymore
3745 DBUG_ASSERT(items2.is_null());
3746
3747 items2= select_lex->ref_ptr_array_slice(3);
3748 if (change_to_use_tmp_fields(thd, items2,
3749 tmp_fields_list2, tmp_all_fields2,
3750 fields_list.elements, tmp_all_fields1))
3751 DBUG_RETURN(true);
3752
3753 curr_fields_list= &tmp_fields_list2;
3754 curr_all_fields= &tmp_all_fields2;
3755 set_items_ref_array(items2);
3756 qep_tab[curr_tmp_table].ref_array= &items2;
3757 qep_tab[curr_tmp_table].all_fields= &tmp_all_fields2;
3758 qep_tab[curr_tmp_table].fields= &tmp_fields_list2;
3759 setup_tmptable_write_func(&qep_tab[curr_tmp_table]);
3760
3761 tmp_table_param.field_count+= tmp_table_param.sum_func_count;
3762 tmp_table_param.sum_func_count= 0;
3763 }
3764 if (qep_tab[curr_tmp_table].table()->distinct)
3765 select_distinct= false; /* Each row is unique */
3766
3767 if (select_distinct && !group_list)
3768 {
3769 if (having_cond)
3770 {
3771 qep_tab[curr_tmp_table].having= having_cond;
3772 having_cond->update_used_tables();
3773 having_cond= NULL;
3774 }
3775 qep_tab[curr_tmp_table].distinct= true;
3776 explain_flags.set(ESC_DISTINCT, ESP_DUPS_REMOVAL);
3777 select_distinct= false;
3778 }
3779 /* Clean tmp_table_param for the next tmp table. */
3780 tmp_table_param.field_count= tmp_table_param.sum_func_count=
3781 tmp_table_param.func_count= 0;
3782
3783 tmp_table_param.copy_field= tmp_table_param.copy_field_end=0;
3784 first_record= sort_and_group=0;
3785
3786 if (!group_optimized_away)
3787 {
3788 grouped= false;
3789 }
3790 else
3791 {
3792 /*
3793 If grouping has been optimized away, a temporary table is
3794 normally not needed unless we're explicitly requested to create
3795 one (e.g. due to a SQL_BUFFER_RESULT hint or INSERT ... SELECT).
3796
3797 In this case (grouping was optimized away), temp_table was
3798 created without a grouping expression and JOIN::exec() will not
3799 perform the necessary grouping (by the use of end_send_group()
3800 or end_write_group()) if JOIN::group is set to false.
3801 */
3802 // the temporary table was explicitly requested
3803 DBUG_ASSERT(select_lex->active_options() & OPTION_BUFFER_RESULT);
3804 // the temporary table does not have a grouping expression
3805 DBUG_ASSERT(!qep_tab[curr_tmp_table].table()->group);
3806 }
3807 calc_group_buffer(this, group_list);
3808 count_field_types(select_lex, &tmp_table_param, *curr_all_fields, false,
3809 false);
3810 }
3811
3812 if (grouped || implicit_grouping || tmp_table_param.sum_func_count)
3813 {
3814 if (make_group_fields(this, this))
3815 DBUG_RETURN(true);
3816
3817 DBUG_ASSERT(items3.is_null());
3818
3819 if (items0.is_null())
3820 init_items_ref_array();
3821 items3= select_lex->ref_ptr_array_slice(4);
3822 setup_copy_fields(thd, &tmp_table_param,
3823 items3, tmp_fields_list3, tmp_all_fields3,
3824 curr_fields_list->elements, *curr_all_fields);
3825
3826 curr_fields_list= &tmp_fields_list3;
3827 curr_all_fields= &tmp_all_fields3;
3828 set_items_ref_array(items3);
3829 if (qep_tab)
3830 {
3831 // Set grouped fields on the last table
3832 qep_tab[primary_tables + tmp_tables - 1].ref_array= &items3;
3833 qep_tab[primary_tables + tmp_tables - 1].all_fields= &tmp_all_fields3;
3834 qep_tab[primary_tables + tmp_tables - 1].fields= &tmp_fields_list3;
3835 }
3836 if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true, true))
3837 DBUG_RETURN(true);
3838 const bool need_distinct=
3839 !(qep_tab && qep_tab[0].quick() && qep_tab[0].quick()->is_agg_loose_index_scan());
3840 if (prepare_sum_aggregators(sum_funcs, need_distinct))
3841 DBUG_RETURN(true);
3842 if (setup_sum_funcs(thd, sum_funcs) || thd->is_fatal_error)
3843 DBUG_RETURN(true);
3844 }
3845 if (qep_tab && (group_list || order))
3846 {
3847 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
3848 DBUG_PRINT("info",("Sorting for send_result_set_metadata"));
3849 THD_STAGE_INFO(thd, stage_sorting_result);
3850 /* If we have already done the group, add HAVING to sorted table */
3851 if (having_cond && !group_list && !sort_and_group)
3852 {
3853 if (add_having_as_tmp_table_cond(curr_tmp_table))
3854 DBUG_RETURN(true);
3855 }
3856
3857 if (grouped)
3858 m_select_limit= HA_POS_ERROR;
3859 else if (!need_tmp)
3860 {
3861 /*
3862 We can abort sorting after thd->select_limit rows if there are no
3863 filter conditions for any tables after the sorted one.
3864 Filter conditions come in several forms:
3865 1. as a condition item attached to the join_tab, or
3866 2. as a keyuse attached to the join_tab (ref access).
3867 */
3868 for (uint i= const_tables + 1; i < primary_tables; i++)
3869 {
3870 QEP_TAB *const tab= qep_tab + i;
3871 if (tab->condition() || // 1
3872 (best_ref[tab->idx()]->keyuse() &&
3873 tab->first_inner() == NO_PLAN_IDX)) // 2
3874 {
3875 /* We have to sort all rows */
3876 m_select_limit= HA_POS_ERROR;
3877 break;
3878 }
3879 }
3880 }
3881 /*
3882 Here we add sorting stage for ORDER BY/GROUP BY clause, if the
3883 optimiser chose FILESORT to be faster than INDEX SCAN or there is
3884 no suitable index present.
3885 OPTION_FOUND_ROWS supersedes LIMIT and is taken into account.
3886 */
3887 DBUG_PRINT("info",("Sorting for order by/group by"));
3888 ORDER_with_src order_arg= group_list ? group_list : order;
3889 if (qep_tab &&
3890 ordered_index_usage !=
3891 (group_list ? ordered_index_group_by : ordered_index_order_by) &&
3892 qep_tab[curr_tmp_table].type() != JT_CONST &&
3893 qep_tab[curr_tmp_table].type() != JT_EQ_REF) // Don't sort 1 row
3894 {
3895 // Sort either first non-const table or the last tmp table
3896 QEP_TAB *const sort_tab= &qep_tab[curr_tmp_table];
3897 if (need_tmp && !materialize_join && !exec_tmp_table->group)
3898 explain_flags.set(order_arg.src, ESP_USING_TMPTABLE);
3899
3900 if (add_sorting_to_table(curr_tmp_table, &order_arg))
3901 DBUG_RETURN(true);
3902 /*
3903 filesort_limit: Return only this many rows from filesort().
3904 We can use select_limit_cnt only if we have no group_by and 1 table.
3905 This allows us to use Bounded_queue for queries like:
3906 "select SQL_CALC_FOUND_ROWS * from t1 order by b desc limit 1;"
3907 m_select_limit == HA_POS_ERROR (we need a full table scan)
3908 unit->select_limit_cnt == 1 (we only need one row in the result set)
3909 */
3910 sort_tab->filesort->limit=
3911 (has_group_by || (primary_tables > curr_tmp_table + 1)) ?
3912 m_select_limit : unit->select_limit_cnt;
3913 }
3914 if (!plan_is_const() &&
3915 !qep_tab[const_tables].table()->sort.io_cache)
3916 {
3917 /*
3918 If no IO cache exists for the first table then we are using an
3919 INDEX SCAN and no filesort. Thus we should not remove the sorted
3920 attribute on the INDEX SCAN.
3921 */
3922 skip_sort_order= true;
3923 }
3924 }
3925 fields= curr_fields_list;
3926 // Reset before execution
3927 set_items_ref_array(items0);
3928 if (qep_tab)
3929 {
3930 qep_tab[primary_tables + tmp_tables - 1].next_select=
3931 get_end_select_func();
3932 }
3933 grouped= has_group_by;
3934
3935 unplug_join_tabs();
3936
3937 DBUG_RETURN(false);
3938 }
3939
unplug_join_tabs()3940 void JOIN::unplug_join_tabs()
3941 {
3942 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
3943 for (uint i= 0; i < tables; ++i)
3944 best_ref[i]->cleanup();
3945
3946 best_ref= NULL;
3947 }
3948
3949 /**
3950 @brief Add Filesort object to the given table to sort if with filesort
3951
3952 @param tab the JOIN_TAB object to attach created Filesort object to
3953 @param sort_order List of expressions to sort the table by
3954
3955 @note This function moves tab->select, if any, to filesort->select
3956
3957 @return false on success, true on OOM
3958 */
3959
3960 bool
add_sorting_to_table(uint idx,ORDER_with_src * sort_order)3961 JOIN::add_sorting_to_table(uint idx, ORDER_with_src *sort_order)
3962 {
3963 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
3964 explain_flags.set(sort_order->src, ESP_USING_FILESORT);
3965 QEP_TAB *const tab= &qep_tab[idx];
3966 tab->filesort=
3967 new (thd->mem_root) Filesort(tab, *sort_order, HA_POS_ERROR);
3968 if (!tab->filesort)
3969 return true;
3970 {
3971 if (tab->ref().key >= 0)
3972 {
3973 TABLE *table= tab->table();
3974 if (tab->quick())
3975 {
3976 /*
3977 We can only use 'Only index' if quick key is same as ref_key
3978 and in index_merge 'Only index' cannot be used
3979 */
3980 if (((uint) tab->ref().key != tab->quick()->index))
3981 table->set_keyread(FALSE);
3982 }
3983 else
3984 {
3985 /*
3986 We have a ref on a const; Change this to a range that filesort
3987 can use.
3988 For impossible ranges (like when doing a lookup on NULL on a NOT NULL
3989 field, quick will contain an empty record set.
3990
3991 @TODO This should be either indicated as range or filesort
3992 should start using ref directly, without switching to quick.
3993 */
3994 JOIN_TAB *const jtab= best_ref[idx];
3995 QUICK_SELECT_I *q= tab->type() == JT_FT ?
3996 get_ft_select(thd, table, tab->ref().key) :
3997 get_quick_select_for_ref(thd, table, &tab->ref(),
3998 jtab->found_records);
3999 if (!q)
4000 return true; /* purecov: inspected */
4001 tab->set_quick(q); // We keep it displaid as "ref".
4002 }
4003 }
4004 }
4005 tab->read_first_record= join_init_read_record;
4006 return false;
4007 }
4008
4009 /**
4010 Find a cheaper access key than a given @a key
4011
4012 @param tab NULL or JOIN_TAB of the accessed table
4013 @param order Linked list of ORDER BY arguments
4014 @param table Table if tab == NULL or tab->table()
4015 @param usable_keys Key map to find a cheaper key in
4016 @param ref_key
4017 * 0 <= key < MAX_KEY - key number (hint) to start the search
4018 * -1 - no key number provided
4019 @param select_limit LIMIT value, or HA_POS_ERROR if no limit
4020 @param [out] new_key Key number if success, otherwise undefined
4021 @param [out] new_key_direction Return -1 (reverse) or +1 if success,
4022 otherwise undefined
4023 @param [out] new_select_limit Return adjusted LIMIT
4024 @param [out] new_used_key_parts NULL by default, otherwise return number
4025 of new_key prefix columns if success
4026 or undefined if the function fails
4027 @param [out] saved_best_key_parts NULL by default, otherwise preserve the
4028 value for further use in QUICK_SELECT_DESC
4029
4030 @note
4031 This function takes into account table->quick_condition_rows statistic
4032 (that is calculated by JOIN::make_join_plan()).
4033 However, single table procedures such as mysql_update() and mysql_delete()
4034 never call JOIN::make_join_plan(), so they have to update it manually
4035 (@see get_index_for_order()).
4036 */
4037
4038 bool
test_if_cheaper_ordering(const JOIN_TAB * tab,ORDER * order,TABLE * table,key_map usable_keys,int ref_key,ha_rows select_limit,int * new_key,int * new_key_direction,ha_rows * new_select_limit,uint * new_used_key_parts,uint * saved_best_key_parts)4039 test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
4040 key_map usable_keys, int ref_key,
4041 ha_rows select_limit,
4042 int *new_key, int *new_key_direction,
4043 ha_rows *new_select_limit, uint *new_used_key_parts,
4044 uint *saved_best_key_parts)
4045 {
4046 DBUG_ENTER("test_if_cheaper_ordering");
4047 /*
4048 Check whether there is an index compatible with the given order
4049 usage of which is cheaper than usage of the ref_key index (ref_key>=0)
4050 or a table scan.
4051 It may be the case if ORDER/GROUP BY is used with LIMIT.
4052 */
4053 ha_rows best_select_limit= HA_POS_ERROR;
4054 JOIN *join= tab ? tab->join() : NULL;
4055 if (join)
4056 ASSERT_BEST_REF_IN_JOIN_ORDER(join);
4057 uint nr;
4058 key_map keys;
4059 uint best_key_parts= 0;
4060 int best_key_direction= 0;
4061 ha_rows best_records= 0;
4062 double read_time;
4063 int best_key= -1;
4064 bool is_best_covering= FALSE;
4065 double fanout= 1;
4066 ha_rows table_records= table->file->stats.records;
4067 bool group= join && join->grouped && order == join->group_list;
4068 double refkey_rows_estimate= static_cast<double>(table->quick_condition_rows);
4069 const bool has_limit= (select_limit != HA_POS_ERROR);
4070 const join_type cur_access_method= tab ? tab->type() : JT_ALL;
4071
4072 /*
4073 If not used with LIMIT, only use keys if the whole query can be
4074 resolved with a key; This is because filesort() is usually faster than
4075 retrieving all rows through an index.
4076 */
4077 if (select_limit >= table_records)
4078 {
4079 keys= *table->file->keys_to_use_for_scanning();
4080 keys.merge(table->covering_keys);
4081
4082 /*
4083 We are adding here also the index specified in FORCE INDEX clause,
4084 if any.
4085 This is to allow users to use index in ORDER BY.
4086 */
4087 if (table->force_index)
4088 keys.merge(group ? table->keys_in_use_for_group_by :
4089 table->keys_in_use_for_order_by);
4090 keys.intersect(usable_keys);
4091 }
4092 else
4093 keys= usable_keys;
4094
4095 if (join)
4096 {
4097 read_time= tab->position()->read_cost;
4098 for (uint jt= tab->idx() + 1; jt < join->primary_tables; jt++)
4099 {
4100 POSITION *pos= join->best_ref[jt]->position();
4101 fanout*= pos->rows_fetched * pos->filter_effect;
4102 }
4103 }
4104 else
4105 read_time= table->file->table_scan_cost().total_cost();
4106
4107 /*
4108 Calculate the selectivity of the ref_key for REF_ACCESS. For
4109 RANGE_ACCESS we use table->quick_condition_rows.
4110 */
4111 if (ref_key >= 0 && cur_access_method == JT_REF)
4112 {
4113 if (table->quick_keys.is_set(ref_key))
4114 refkey_rows_estimate= static_cast<double>(table->quick_rows[ref_key]);
4115 else
4116 {
4117 const KEY *ref_keyinfo= table->key_info + ref_key;
4118 if (ref_keyinfo->has_records_per_key(tab->ref().key_parts - 1))
4119 refkey_rows_estimate=
4120 ref_keyinfo->records_per_key(tab->ref().key_parts - 1);
4121 else
4122 refkey_rows_estimate= 1.0; // No index statistics
4123 }
4124 DBUG_ASSERT(refkey_rows_estimate >= 1.0);
4125 }
4126
4127 for (nr=0; nr < table->s->keys ; nr++)
4128 {
4129 int direction;
4130 uint used_key_parts;
4131
4132 if (keys.is_set(nr) &&
4133 (direction= test_if_order_by_key(order, table, nr, &used_key_parts)))
4134 {
4135 /*
4136 At this point we are sure that ref_key is a non-ordering
4137 key (where "ordering key" is a key that will return rows
4138 in the order required by ORDER BY).
4139 */
4140 DBUG_ASSERT (ref_key != (int) nr);
4141
4142 bool is_covering= table->covering_keys.is_set(nr) ||
4143 (nr == table->s->primary_key &&
4144 table->file->primary_key_is_clustered());
4145
4146 /*
4147 Don't use an index scan with ORDER BY without limit.
4148 For GROUP BY without limit always use index scan
4149 if there is a suitable index.
4150 Why we hold to this asymmetry hardly can be explained
4151 rationally. It's easy to demonstrate that using
4152 temporary table + filesort could be cheaper for grouping
4153 queries too.
4154 */
4155 if (is_covering ||
4156 select_limit != HA_POS_ERROR ||
4157 (ref_key < 0 && (group || table->force_index)))
4158 {
4159 rec_per_key_t rec_per_key;
4160 KEY *keyinfo= table->key_info+nr;
4161 if (select_limit == HA_POS_ERROR)
4162 select_limit= table_records;
4163 if (group)
4164 {
4165 /*
4166 Used_key_parts can be larger than keyinfo->key_parts
4167 when using a secondary index clustered with a primary
4168 key (e.g. as in Innodb).
4169 See Bug #28591 for details.
4170 */
4171 rec_per_key= used_key_parts &&
4172 used_key_parts <= actual_key_parts(keyinfo) ?
4173 keyinfo->records_per_key(used_key_parts - 1) : 1.0f;
4174 set_if_bigger(rec_per_key, 1.0f);
4175 /*
4176 With a grouping query each group containing on average
4177 rec_per_key records produces only one row that will
4178 be included into the result set.
4179 */
4180 if (select_limit > table_records/rec_per_key)
4181 select_limit= table_records;
4182 else
4183 select_limit= (ha_rows) (select_limit*rec_per_key);
4184 }
4185 /*
4186 If tab=tk is not the last joined table tn then to get first
4187 L records from the result set we can expect to retrieve
4188 only L/fanout(tk,tn) where fanout(tk,tn) says how many
4189 rows in the record set on average will match each row tk.
4190 Usually our estimates for fanouts are too pessimistic.
4191 So the estimate for L/fanout(tk,tn) will be too optimistic
4192 and as result we'll choose an index scan when using ref/range
4193 access + filesort will be cheaper.
4194 */
4195 select_limit= (ha_rows) (select_limit < fanout ?
4196 1 : select_limit/fanout);
4197 /*
4198 We assume that each of the tested indexes is not correlated
4199 with ref_key. Thus, to select first N records we have to scan
4200 N/selectivity(ref_key) index entries.
4201 selectivity(ref_key) = #scanned_records/#table_records =
4202 refkey_rows_estimate/table_records.
4203 In any case we can't select more than #table_records.
4204 N/(refkey_rows_estimate/table_records) > table_records
4205 <=> N > refkey_rows_estimate.
4206 */
4207 if (select_limit > refkey_rows_estimate)
4208 select_limit= table_records;
4209 else
4210 select_limit= (ha_rows) (select_limit *
4211 (double) table_records /
4212 refkey_rows_estimate);
4213 rec_per_key=
4214 keyinfo->records_per_key(keyinfo->user_defined_key_parts - 1);
4215 set_if_bigger(rec_per_key, 1.0f);
4216 /*
4217 Here we take into account the fact that rows are
4218 accessed in sequences rec_per_key records in each.
4219 Rows in such a sequence are supposed to be ordered
4220 by rowid/primary key. When reading the data
4221 in a sequence we'll touch not more pages than the
4222 table file contains.
4223 TODO. Use the formula for a disk sweep sequential access
4224 to calculate the cost of accessing data rows for one
4225 index entry.
4226 */
4227 const Cost_estimate table_scan_time= table->file->table_scan_cost();
4228 const double index_scan_time= select_limit / rec_per_key *
4229 min<double>(table->cost_model()->page_read_cost(rec_per_key),
4230 table_scan_time.total_cost());
4231
4232 /*
4233 Switch to index that gives order if its scan time is smaller than
4234 read_time of current chosen access method. In addition, if the
4235 current chosen access method is index scan or table scan, always
4236 switch to the index that gives order when it is covering or when
4237 force index or group by is present.
4238 */
4239 if (((cur_access_method == JT_ALL || cur_access_method == JT_INDEX_SCAN)
4240 && (is_covering || group || table->force_index)) ||
4241 index_scan_time < read_time)
4242 {
4243 ha_rows quick_records= table_records;
4244 const ha_rows refkey_select_limit=
4245 (ref_key >= 0 && table->covering_keys.is_set(ref_key)) ?
4246 static_cast<ha_rows>(refkey_rows_estimate) :
4247 HA_POS_ERROR;
4248
4249 if ((is_best_covering && !is_covering) ||
4250 (is_covering && refkey_select_limit < select_limit))
4251 continue;
4252 if (table->quick_keys.is_set(nr))
4253 quick_records= table->quick_rows[nr];
4254 if (best_key < 0 ||
4255 (select_limit <= min(quick_records,best_records) ?
4256 keyinfo->user_defined_key_parts < best_key_parts :
4257 quick_records < best_records))
4258 {
4259 best_key= nr;
4260 best_key_parts= keyinfo->user_defined_key_parts;
4261 if (saved_best_key_parts)
4262 *saved_best_key_parts= used_key_parts;
4263 best_records= quick_records;
4264 is_best_covering= is_covering;
4265 best_key_direction= direction;
4266 best_select_limit= select_limit;
4267 }
4268 }
4269 }
4270 }
4271 }
4272
4273 if (best_key < 0 || best_key == ref_key)
4274 DBUG_RETURN(FALSE);
4275
4276 *new_key= best_key;
4277 *new_key_direction= best_key_direction;
4278 *new_select_limit= has_limit ? best_select_limit : table_records;
4279 if (new_used_key_parts != NULL)
4280 *new_used_key_parts= best_key_parts;
4281
4282 DBUG_RETURN(TRUE);
4283 }
4284
4285
4286 /**
4287 Find a key to apply single table UPDATE/DELETE by a given ORDER
4288
4289 @param order Linked list of ORDER BY arguments
4290 @param table Table to find a key
4291 @param select Pointer to access/update select->quick (if any)
4292 @param limit LIMIT clause parameter
4293 @param [out] need_sort TRUE if filesort needed
4294 @param [out] reverse
4295 TRUE if the key is reversed again given ORDER (undefined if key == MAX_KEY)
4296
4297 @return
4298 - MAX_KEY if no key found (need_sort == TRUE)
4299 - MAX_KEY if quick select result order is OK (need_sort == FALSE)
4300 - key number (either index scan or quick select) (need_sort == FALSE)
4301
4302 @note
4303 Side effects:
4304 - may deallocate or deallocate and replace select->quick;
4305 - may set table->quick_condition_rows and table->quick_rows[...]
4306 to table->file->stats.records.
4307 */
4308
get_index_for_order(ORDER * order,QEP_TAB * tab,ha_rows limit,bool * need_sort,bool * reverse)4309 uint get_index_for_order(ORDER *order, QEP_TAB *tab,
4310 ha_rows limit, bool *need_sort, bool *reverse)
4311 {
4312 if (tab->quick() && tab->quick()->unique_key_range())
4313 { // Single row select (always "ordered"): Ok to use with key field UPDATE
4314 *need_sort= FALSE;
4315 /*
4316 Returning of MAX_KEY here prevents updating of used_key_is_modified
4317 in mysql_update(). Use quick select "as is".
4318 */
4319 return MAX_KEY;
4320 }
4321
4322 TABLE *const table= tab->table();
4323
4324 if (!order)
4325 {
4326 *need_sort= FALSE;
4327 if (tab->quick())
4328 return tab->quick()->index; // index or MAX_KEY, use quick select as is
4329 else
4330 return table->file->key_used_on_scan; // MAX_KEY or index for some engines
4331 }
4332
4333 if (!is_simple_order(order)) // just to cut further expensive checks
4334 {
4335 *need_sort= TRUE;
4336 return MAX_KEY;
4337 }
4338
4339 if (tab->quick())
4340 {
4341 if (tab->quick()->index == MAX_KEY)
4342 {
4343 *need_sort= TRUE;
4344 return MAX_KEY;
4345 }
4346
4347 uint used_key_parts;
4348 switch (test_if_order_by_key(order, table, tab->quick()->index,
4349 &used_key_parts)) {
4350 case 1: // desired order
4351 *need_sort= FALSE;
4352 return tab->quick()->index;
4353 case 0: // unacceptable order
4354 *need_sort= TRUE;
4355 return MAX_KEY;
4356 case -1: // desired order, but opposite direction
4357 {
4358 QUICK_SELECT_I *reverse_quick;
4359 if ((reverse_quick=
4360 tab->quick()->make_reverse(used_key_parts)))
4361 {
4362 delete tab->quick();
4363 tab->set_quick(reverse_quick);
4364 tab->set_type(calc_join_type(reverse_quick->get_type()));
4365 *need_sort= FALSE;
4366 return reverse_quick->index;
4367 }
4368 else
4369 {
4370 *need_sort= TRUE;
4371 return MAX_KEY;
4372 }
4373 }
4374 }
4375 DBUG_ASSERT(0);
4376 }
4377 else if (limit != HA_POS_ERROR)
4378 { // check if some index scan & LIMIT is more efficient than filesort
4379
4380 /*
4381 Update quick_condition_rows since single table UPDATE/DELETE procedures
4382 don't call JOIN::make_join_plan() and leave this variable uninitialized.
4383 */
4384 table->quick_condition_rows= table->file->stats.records;
4385
4386 int key, direction;
4387 if (test_if_cheaper_ordering(NULL, order, table,
4388 table->keys_in_use_for_order_by, -1,
4389 limit,
4390 &key, &direction, &limit))
4391 {
4392 *need_sort= FALSE;
4393 *reverse= (direction < 0);
4394 return key;
4395 }
4396 }
4397 *need_sort= TRUE;
4398 return MAX_KEY;
4399 }
4400
4401
4402 /**
4403 Returns number of key parts depending on
4404 OPTIMIZER_SWITCH_USE_INDEX_EXTENSIONS flag.
4405
4406 @param key_info pointer to KEY structure
4407
4408 @return number of key parts.
4409 */
4410
actual_key_parts(const KEY * key_info)4411 uint actual_key_parts(const KEY *key_info)
4412 {
4413 return key_info->table->in_use->
4414 optimizer_switch_flag(OPTIMIZER_SWITCH_USE_INDEX_EXTENSIONS) ?
4415 key_info->actual_key_parts : key_info->user_defined_key_parts;
4416 }
4417
4418
4419 /**
4420 Returns key flags depending on
4421 OPTIMIZER_SWITCH_USE_INDEX_EXTENSIONS flag.
4422
4423 @param key_info pointer to KEY structure
4424
4425 @return key flags.
4426 */
4427
actual_key_flags(KEY * key_info)4428 uint actual_key_flags(KEY *key_info)
4429 {
4430 return key_info->table->in_use->
4431 optimizer_switch_flag(OPTIMIZER_SWITCH_USE_INDEX_EXTENSIONS) ?
4432 key_info->actual_flags : key_info->flags;
4433 }
4434
4435
calc_join_type(int quick_type)4436 join_type calc_join_type(int quick_type)
4437 {
4438 if ((quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) ||
4439 (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) ||
4440 (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION))
4441 return JT_INDEX_MERGE;
4442 else
4443 return JT_RANGE;
4444 }
4445
4446
4447 /**
4448 @} (end of group Query_Optimizer)
4449 */
4450