1 /* Copyright (c) 2000, 2021, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23 /**
24 @file
25
26 @brief Optimize query expressions: Make optimal table join order, select
27 optimal access methods per table, apply grouping, sorting and
28 limit processing.
29
30 @defgroup Query_Optimizer Query Optimizer
31 @{
32 */
33
34 #include "sql_optimizer.h"
35
36 #include "my_bit.h" // my_count_bits
37 #include "abstract_query_plan.h" // Join_plan
38 #include "debug_sync.h" // DEBUG_SYNC
39 #include "item_sum.h" // Item_sum
40 #include "lock.h" // mysql_unlock_some_tables
41 #include "opt_explain.h" // join_type_str
42 #include "opt_trace.h" // Opt_trace_object
43 #include "sql_base.h" // init_ftfuncs
44 #include "sql_join_buffer.h" // JOIN_CACHE
45 #include "sql_parse.h" // check_stack_overrun
46 #include "sql_planner.h" // calculate_condition_filter
47 #include "sql_resolver.h" // subquery_allows_materialization
48 #include "sql_test.h" // print_where
49 #include "sql_tmp_table.h" // get_max_key_and_part_length
50 #include "opt_hints.h" // hint_table_state
51
52 #include <algorithm>
53 using std::max;
54 using std::min;
55
56 static bool optimize_semijoin_nests_for_materialization(JOIN *join);
57 static void calculate_materialization_costs(JOIN *join, TABLE_LIST *sj_nest,
58 uint n_tables,
59 Semijoin_mat_optimize *sjm);
60 static bool make_join_select(JOIN *join, Item *item);
61 static bool list_contains_unique_index(JOIN_TAB *tab,
62 bool (*find_func) (Field *, void *), void *data);
63 static bool find_field_in_item_list (Field *field, void *data);
64 static bool find_field_in_order_list (Field *field, void *data);
65 static ORDER *create_distinct_group(THD *thd, Ref_ptr_array ref_pointer_array,
66 ORDER *order, List<Item> &fields,
67 List<Item> &all_fields,
68 bool *all_order_by_fields_used);
69 static TABLE *get_sort_by_table(ORDER *a,ORDER *b,TABLE_LIST *tables);
70 static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab);
71 static Item *remove_additional_cond(Item* conds);
72 static void trace_table_dependencies(Opt_trace_context * trace,
73 JOIN_TAB *join_tabs,
74 uint table_count);
75 static bool
76 update_ref_and_keys(THD *thd, Key_use_array *keyuse,JOIN_TAB *join_tab,
77 uint tables, Item *cond, COND_EQUAL *cond_equal,
78 table_map normal_tables, SELECT_LEX *select_lex,
79 SARGABLE_PARAM **sargables);
80 static bool pull_out_semijoin_tables(JOIN *join);
81 static void add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab);
82 static ha_rows get_quick_record_count(THD *thd, JOIN_TAB *tab, ha_rows limit);
83 static Item *
84 make_cond_for_table_from_pred(Item *root_cond, Item *cond,
85 table_map tables, table_map used_table,
86 bool exclude_expensive_cond);
87 static bool
88 only_eq_ref_tables(JOIN *join, ORDER *order, table_map tables,
89 table_map *cached_eq_ref_tables, table_map
90 *eq_ref_tables);
91 static bool setup_join_buffering(JOIN_TAB *tab, JOIN *join, uint no_jbuf_after);
92
93 static bool
94 test_if_skip_sort_order(JOIN_TAB *tab, ORDER *order, ha_rows select_limit,
95 const bool no_changes, const key_map *map,
96 const char *clause_type);
97
98 static Item_func_match *test_if_ft_index_order(ORDER *order);
99
100
101 static uint32 get_key_length_tmp_table(Item *item);
102
103 /**
104 Optimizes one query block into a query execution plan (QEP.)
105
106 This is the entry point to the query optimization phase. This phase
107 applies both logical (equivalent) query rewrites, cost-based join
108 optimization, and rule-based access path selection. Once an optimal
109 plan is found, the member function creates/initializes all
110 structures needed for query execution. The main optimization phases
111 are outlined below:
112
113 -# Logical transformations:
114 - Outer to inner joins transformation.
115 - Equality/constant propagation.
116 - Partition pruning.
117 - COUNT(*), MIN(), MAX() constant substitution in case of
118 implicit grouping.
119 - ORDER BY optimization.
120 -# Perform cost-based optimization of table order and access path
121 selection. See JOIN::make_join_plan()
122 -# Post-join order optimization:
123 - Create optimal table conditions from the where clause and the
124 join conditions.
125 - Inject outer-join guarding conditions.
126 - Adjust data access methods after determining table condition
127 (several times.)
128 - Optimize ORDER BY/DISTINCT.
129 -# Code generation
130 - Set data access functions.
131 - Try to optimize away sorting/distinct.
132 - Setup temporary table usage for grouping and/or sorting.
133
134 @retval 0 Success.
135 @retval 1 Error, error code saved in member JOIN::error.
136 */
137 int
optimize()138 JOIN::optimize()
139 {
140 uint no_jbuf_after= UINT_MAX;
141
142 DBUG_ENTER("JOIN::optimize");
143 assert(select_lex->leaf_table_count == 0 ||
144 thd->lex->is_query_tables_locked() ||
145 select_lex == unit->fake_select_lex);
146 assert(tables == 0 &&
147 primary_tables == 0 &&
148 tables_list == (TABLE_LIST*)1);
149
150 // to prevent double initialization on EXPLAIN
151 if (optimized)
152 DBUG_RETURN(0);
153
154 Prepare_error_tracker tracker(thd);
155
156 DEBUG_SYNC(thd, "before_join_optimize");
157
158 THD_STAGE_INFO(thd, stage_optimizing);
159
160 if (select_lex->first_execution)
161 {
162 /**
163 @todo
164 This query block didn't transform itself in SELECT_LEX::prepare(), so
165 belongs to a parent query block. That parent, or its parents, had to
166 transform us - it has not; maybe it is itself in prepare() and
167 evaluating the present query block as an Item_subselect. Such evaluation
168 in prepare() is expected to be a rare case to be eliminated in the
169 future ("SET x=(subq)" is one such case; because it locks tables before
170 prepare()).
171 */
172 if (select_lex->apply_local_transforms(thd, false))
173 DBUG_RETURN(error= 1);
174 }
175
176 Opt_trace_context * const trace= &thd->opt_trace;
177 Opt_trace_object trace_wrapper(trace);
178 Opt_trace_object trace_optimize(trace, "join_optimization");
179 trace_optimize.add_select_number(select_lex->select_number);
180 Opt_trace_array trace_steps(trace, "steps");
181
182 count_field_types(select_lex, &tmp_table_param, all_fields, false, false);
183
184 assert(tmp_table_param.sum_func_count == 0 ||
185 group_list || implicit_grouping);
186
187 if (select_lex->olap == ROLLUP_TYPE && optimize_rollup())
188 DBUG_RETURN(true); /* purecov: inspected */
189
190 if (alloc_func_list())
191 DBUG_RETURN(1); /* purecov: inspected */
192
193 if (select_lex->get_optimizable_conditions(thd, &where_cond, &having_cond))
194 DBUG_RETURN(1);
195
196 set_optimized();
197
198 tables_list= select_lex->get_table_list();
199
200 /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
201 /*
202 Run optimize phase for all derived tables/views used in this SELECT,
203 including those in semi-joins.
204 */
205 if (select_lex->materialized_derived_table_count)
206 {
207 for (TABLE_LIST *tl= select_lex->leaf_tables; tl; tl= tl->next_leaf)
208 {
209 if (tl->is_view_or_derived() && tl->optimize_derived(thd))
210 DBUG_RETURN(1);
211 }
212 }
213
214 /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
215
216 row_limit= ((select_distinct || order || group_list) ?
217 HA_POS_ERROR : unit->select_limit_cnt);
218 // m_select_limit is used to decide if we are likely to scan the whole table.
219 m_select_limit= unit->select_limit_cnt;
220
221 if (unit->first_select()->active_options() & OPTION_FOUND_ROWS)
222 {
223 /*
224 Calculate found rows if
225 - LIMIT is set, and
226 - Query block is not equipped with "braces". In this case, each
227 query block must be calculated fully and the limit is applied on
228 the final UNION evaluation.
229 */
230 calc_found_rows= m_select_limit != HA_POS_ERROR && !select_lex->braces;
231 }
232 if (having_cond || calc_found_rows)
233 m_select_limit= HA_POS_ERROR;
234
235 if (unit->select_limit_cnt == 0 && !calc_found_rows)
236 {
237 zero_result_cause= "Zero limit";
238 best_rowcount= 0;
239 goto setup_subq_exit;
240 }
241
242 if (where_cond || select_lex->outer_join)
243 {
244 if (optimize_cond(thd, &where_cond, &cond_equal,
245 &select_lex->top_join_list, &select_lex->cond_value))
246 {
247 error= 1;
248 DBUG_PRINT("error",("Error from optimize_cond"));
249 DBUG_RETURN(1);
250 }
251 if (select_lex->cond_value == Item::COND_FALSE)
252 {
253 zero_result_cause= "Impossible WHERE";
254 best_rowcount= 0;
255 goto setup_subq_exit;
256 }
257 }
258 if (having_cond)
259 {
260 if (optimize_cond(thd, &having_cond, &cond_equal, NULL,
261 &select_lex->having_value))
262 {
263 error= 1;
264 DBUG_PRINT("error",("Error from optimize_cond"));
265 DBUG_RETURN(1);
266 }
267 if (select_lex->having_value == Item::COND_FALSE)
268 {
269 zero_result_cause= "Impossible HAVING";
270 best_rowcount= 0;
271 goto setup_subq_exit;
272 }
273 }
274
275 if (select_lex->partitioned_table_count && prune_table_partitions())
276 {
277 error= 1;
278 DBUG_PRINT("error", ("Error from prune_partitions"));
279 DBUG_RETURN(1);
280 }
281
282 /*
283 Try to optimize count(*), min() and max() to const fields if
284 there is implicit grouping (aggregate functions but no
285 group_list). In this case, the result set shall only contain one
286 row.
287 */
288 if (tables_list && implicit_grouping)
289 {
290 int res;
291 /*
292 opt_sum_query() returns HA_ERR_KEY_NOT_FOUND if no rows match
293 the WHERE condition,
294 or 1 if all items were resolved (optimized away),
295 or 0, or an error number HA_ERR_...
296
297 If all items were resolved by opt_sum_query, there is no need to
298 open any tables.
299 */
300 if ((res= opt_sum_query(thd, select_lex->leaf_tables, all_fields,
301 where_cond)))
302 {
303 best_rowcount= 0;
304 if (res == HA_ERR_KEY_NOT_FOUND)
305 {
306 DBUG_PRINT("info",("No matching min/max row"));
307 zero_result_cause= "No matching min/max row";
308 goto setup_subq_exit;
309 }
310 if (res > 1)
311 {
312 error= res;
313 DBUG_PRINT("error",("Error from opt_sum_query"));
314 DBUG_RETURN(1);
315 }
316 if (res < 0)
317 {
318 DBUG_PRINT("info",("No matching min/max row"));
319 zero_result_cause= "No matching min/max row";
320 goto setup_subq_exit;
321 }
322 DBUG_PRINT("info",("Select tables optimized away"));
323 zero_result_cause= "Select tables optimized away";
324 tables_list= 0; // All tables resolved
325 best_rowcount= 1;
326 const_tables= tables= primary_tables= select_lex->leaf_table_count;
327 /*
328 Extract all table-independent conditions and replace the WHERE
329 clause with them. All other conditions were computed by opt_sum_query
330 and the MIN/MAX/COUNT function(s) have been replaced by constants,
331 so there is no need to compute the whole WHERE clause again.
332 Notice that make_cond_for_table() will always succeed to remove all
333 computed conditions, because opt_sum_query() is applicable only to
334 conjunctions.
335 Preserve conditions for EXPLAIN.
336 */
337 if (where_cond && !thd->lex->describe)
338 {
339 Item *table_independent_conds=
340 make_cond_for_table(where_cond, PSEUDO_TABLE_BITS, 0, 0);
341 DBUG_EXECUTE("where",
342 print_where(table_independent_conds,
343 "where after opt_sum_query()",
344 QT_ORDINARY););
345 where_cond= table_independent_conds;
346 }
347 goto setup_subq_exit;
348 }
349 }
350 if (!tables_list)
351 {
352 DBUG_PRINT("info",("No tables"));
353 best_rowcount= 1;
354 error= 0;
355 if (make_tmp_tables_info())
356 DBUG_RETURN(1);
357 count_field_types(select_lex, &tmp_table_param, all_fields, false, false);
358 // Make plan visible for EXPLAIN
359 set_plan_state(NO_TABLES);
360 DBUG_RETURN(0);
361 }
362 error= -1; // Error is sent to client
363 sort_by_table= get_sort_by_table(order, group_list, select_lex->leaf_tables);
364
365 if ((where_cond || group_list || order) &&
366 substitute_gc(thd, select_lex, where_cond, group_list, order))
367 {
368 // We added hidden fields to the all_fields list, count them.
369 count_field_types(select_lex, &tmp_table_param, select_lex->all_fields,
370 false, false);
371 }
372
373 // Set up join order and initial access paths
374 THD_STAGE_INFO(thd, stage_statistics);
375 if (make_join_plan())
376 {
377 if (thd->killed)
378 thd->send_kill_message();
379 DBUG_PRINT("error",("Error: JOIN::make_join_plan() failed"));
380 DBUG_RETURN(1);
381 }
382
383 // At this stage, join_tab==NULL, JOIN_TABs are listed in order by best_ref.
384 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
385
386 if (zero_result_cause)
387 goto setup_subq_exit;
388
389 if (rollup.state != ROLLUP::STATE_NONE)
390 {
391 if (rollup_process_const_fields())
392 {
393 DBUG_PRINT("error", ("Error: rollup_process_fields() failed"));
394 DBUG_RETURN(1);
395 }
396 /*
397 Fields may have been replaced by Item_func_rollup_const, so
398 recalculate the number of fields and functions for this query block.
399 */
400
401 // JOIN::optimize_rollup() may set quick_group=0, and we must not undo that.
402 const uint save_quick_group= tmp_table_param.quick_group;
403
404 count_field_types(select_lex, &tmp_table_param, all_fields, false, false);
405 tmp_table_param.quick_group= save_quick_group;
406 }
407 else
408 {
409 /* Remove distinct if only const tables */
410 select_distinct&= !plan_is_const();
411 }
412
413 if (const_tables && !thd->locked_tables_mode &&
414 !(select_lex->active_options() & SELECT_NO_UNLOCK))
415 {
416 TABLE *ct[MAX_TABLES];
417 for (uint i= 0; i < const_tables; i++)
418 ct[i]= best_ref[i]->table();
419 mysql_unlock_some_tables(thd, ct, const_tables);
420 }
421 if (!where_cond && select_lex->outer_join)
422 {
423 /* Handle the case where we have an OUTER JOIN without a WHERE */
424 where_cond=new Item_int((longlong) 1,1); // Always true
425 }
426
427 error= 0;
428 /*
429 Among the equal fields belonging to the same multiple equality
430 choose the one that is to be retrieved first and substitute
431 all references to these in where condition for a reference for
432 the selected field.
433 */
434 if (where_cond)
435 {
436 where_cond= substitute_for_best_equal_field(where_cond, cond_equal,
437 map2table);
438 if (thd->is_error())
439 {
440 error= 1;
441 DBUG_PRINT("error",("Error from substitute_for_best_equal"));
442 DBUG_RETURN(1);
443 }
444 where_cond->update_used_tables();
445 DBUG_EXECUTE("where",
446 print_where(where_cond,
447 "after substitute_best_equal",
448 QT_ORDINARY););
449 }
450
451 /*
452 Perform the same optimization on field evaluation for all join conditions.
453 */
454 for (uint i= const_tables; i < tables ; ++i)
455 {
456 JOIN_TAB *const tab= best_ref[i];
457 if (tab->position() && tab->join_cond())
458 {
459 tab->set_join_cond(substitute_for_best_equal_field(tab->join_cond(),
460 tab->cond_equal,
461 map2table));
462 if (thd->is_error())
463 {
464 error= 1;
465 DBUG_PRINT("error",("Error from substitute_for_best_equal"));
466 DBUG_RETURN(1);
467 }
468 tab->join_cond()->update_used_tables();
469 }
470 }
471
472 if (init_ref_access())
473 {
474 error= 1;
475 DBUG_PRINT("error",("Error from init_ref_access"));
476 DBUG_RETURN(1);
477 }
478
479 // Update table dependencies after assigning ref access fields
480 update_depend_map();
481
482 THD_STAGE_INFO(thd, stage_preparing);
483
484 if (make_join_select(this, where_cond))
485 {
486 if (thd->is_error())
487 DBUG_RETURN(1);
488
489 zero_result_cause=
490 "Impossible WHERE noticed after reading const tables";
491 goto setup_subq_exit;
492 }
493
494 if (select_lex->query_result()->initialize_tables(this))
495 {
496 DBUG_PRINT("error",("Error: initialize_tables() failed"));
497 DBUG_RETURN(1); // error == -1
498 }
499
500 error= -1; /* if goto err */
501
502 if (optimize_distinct_group_order())
503 DBUG_RETURN(true);
504
505 if ((select_lex->active_options() & SELECT_NO_JOIN_CACHE) ||
506 select_lex->ftfunc_list->elements)
507 no_jbuf_after= 0;
508
509 /* Perform FULLTEXT search before all regular searches */
510 if (select_lex->has_ft_funcs() && optimize_fts_query())
511 DBUG_RETURN(1);
512
513 /*
514 By setting child_subquery_can_materialize so late we gain the following:
515 JOIN::compare_costs_of_subquery_strategies() can test this variable to
516 know if we are have finished evaluating constant conditions, which itself
517 helps determining fanouts.
518 */
519 child_subquery_can_materialize= true;
520
521 /*
522 It's necessary to check const part of HAVING cond as
523 there is a chance that some cond parts may become
524 const items after make_join_statisctics(for example
525 when Item is a reference to const table field from
526 outer join).
527 This check is performed only for those conditions
528 which do not use aggregate functions. In such case
529 temporary table may not be used and const condition
530 elements may be lost during further having
531 condition transformation in JOIN::exec.
532 */
533 if (having_cond && const_table_map && !having_cond->with_sum_func)
534 {
535 having_cond->update_used_tables();
536 if (remove_eq_conds(thd, having_cond, &having_cond,
537 &select_lex->having_value))
538 {
539 error= 1;
540 DBUG_PRINT("error",("Error from remove_eq_conds"));
541 DBUG_RETURN(1);
542 }
543 if (select_lex->having_value == Item::COND_FALSE)
544 {
545 having_cond= new Item_int((longlong) 0,1);
546 zero_result_cause= "Impossible HAVING noticed after reading const tables";
547 goto setup_subq_exit;
548 }
549 }
550
551 /* Cache constant expressions in WHERE, HAVING, ON clauses. */
552 if (!plan_is_const() && cache_const_exprs())
553 DBUG_RETURN(1);
554
555 // See if this subquery can be evaluated with subselect_indexsubquery_engine
556 if (const int ret= replace_index_subquery())
557 {
558 set_plan_state(PLAN_READY);
559 /*
560 We leave optimize() because the rest of it is only about order/group
561 which those subqueries don't have and about setting up plan which
562 we're not going to use due to different execution method.
563 */
564 DBUG_RETURN(ret < 0);
565 }
566
567 {
568 /*
569 If the hint FORCE INDEX FOR ORDER BY/GROUP BY is used for the first
570 table (it does not make sense for other tables) then we cannot do join
571 buffering.
572 */
573 if (!plan_is_const())
574 {
575 const TABLE * const first= best_ref[const_tables]->table();
576 if ((first->force_index_order && order) ||
577 (first->force_index_group && group_list))
578 no_jbuf_after= 0;
579 }
580
581 bool simple_sort= true;
582 // Check whether join cache could be used
583 for (uint i= const_tables; i < tables; i++)
584 {
585 JOIN_TAB *const tab= best_ref[i];
586 if (!tab->position())
587 continue;
588 if (setup_join_buffering(tab, this, no_jbuf_after))
589 DBUG_RETURN(true);
590 if (tab->use_join_cache() != JOIN_CACHE::ALG_NONE)
591 simple_sort= false;
592 assert(tab->type() != JT_FT ||
593 tab->use_join_cache() == JOIN_CACHE::ALG_NONE);
594 }
595 if (!simple_sort)
596 {
597 /*
598 A join buffer is used for this table. We here inform the optimizer
599 that it should not rely on rows of the first non-const table being in
600 order thanks to an index scan; indeed join buffering of the present
601 table subsequently changes the order of rows.
602 */
603 simple_order= simple_group= false;
604 }
605 }
606
607 if (!plan_is_const() && order)
608 {
609 /*
610 Force using of tmp table if sorting by a SP or UDF function due to
611 their expensive and probably non-deterministic nature.
612 */
613 for (ORDER *tmp_order= order; tmp_order ; tmp_order=tmp_order->next)
614 {
615 Item *item= *tmp_order->item;
616 if (item->is_expensive())
617 {
618 /* Force tmp table without sort */
619 simple_order= simple_group= false;
620 break;
621 }
622 }
623 }
624
625 /*
626 Check if we need to create a temporary table.
627 This has to be done if all tables are not already read (const tables)
628 and one of the following conditions holds:
629 - We are using DISTINCT (simple distinct's have already been optimized away)
630 - We are using an ORDER BY or GROUP BY on fields not in the first table
631 - We are using different ORDER BY and GROUP BY orders
632 - The user wants us to buffer the result.
633 When the WITH ROLLUP modifier is present, we cannot skip temporary table
634 creation for the DISTINCT clause just because there are only const tables.
635 */
636 need_tmp= ((!plan_is_const() &&
637 ((select_distinct || (order && !simple_order) ||
638 (group_list && !simple_group)) ||
639 (group_list && order) ||
640 (select_lex->active_options() & OPTION_BUFFER_RESULT))) ||
641 (rollup.state != ROLLUP::STATE_NONE && select_distinct));
642
643 DBUG_EXECUTE("info", TEST_join(this););
644
645 if (!plan_is_const())
646 {
647 JOIN_TAB *tab= best_ref[const_tables];
648 /*
649 Because filesort always does a full table scan or a quick range scan
650 we must add the removed reference to the select for the table.
651 We only need to do this when we have a simple_order or simple_group
652 as in other cases the join is done before the sort.
653 */
654 if ((order || group_list) &&
655 tab->type() != JT_ALL &&
656 tab->type() != JT_FT &&
657 tab->type() != JT_REF_OR_NULL &&
658 ((order && simple_order) || (group_list && simple_group)))
659 {
660 if (add_ref_to_table_cond(thd,tab)) {
661 DBUG_RETURN(1);
662 }
663 }
664 // Test if we can use an index instead of sorting
665 test_skip_sort();
666 }
667
668 if (alloc_qep(tables))
669 DBUG_RETURN(error= 1); /* purecov: inspected */
670
671 if (make_join_readinfo(this, no_jbuf_after))
672 DBUG_RETURN(1); /* purecov: inspected */
673
674 if (make_tmp_tables_info())
675 DBUG_RETURN(1);
676
677 // At this stage, we have fully set QEP_TABs; JOIN_TABs are unaccessible,
678 // pushed joins(see below) are still allowed to change the QEP_TABs
679
680 /*
681 Push joins to handlerton(s)
682
683 The handlerton(s) will inspect the QEP through the
684 AQP (Abstract Query Plan) and extract from it whatever
685 it might implement of pushed execution.
686
687 It is the responsibility of the handler:
688 - to store any information it need for later
689 execution of pushed queries.
690 - to call appropriate AQP functions which modifies the
691 QEP to use the special 'linked' read functions
692 for those parts of the join which have been pushed.
693
694 Currently pushed joins are only implemented by NDB.
695
696 It only make sense to try pushing if > 1 non-const tables.
697 */
698 if (!plan_is_single_table() && !plan_is_const())
699 {
700 const AQP::Join_plan plan(this);
701 if (ha_make_pushed_joins(thd, &plan))
702 DBUG_RETURN(1);
703 }
704
705 // Update m_current_query_cost to reflect actual need of filesort.
706 if (sort_cost > 0.0 && !explain_flags.any(ESP_USING_FILESORT))
707 {
708 best_read-= sort_cost;
709 sort_cost= 0.0;
710 if (thd->lex->is_single_level_stmt())
711 thd->m_current_query_cost= best_read;
712 }
713
714 count_field_types(select_lex, &tmp_table_param, all_fields, false, false);
715 // Make plan visible for EXPLAIN
716 set_plan_state(PLAN_READY);
717
718 DEBUG_SYNC(thd, "after_join_optimize");
719
720 error= 0;
721 DBUG_RETURN(0);
722
723 setup_subq_exit:
724
725 assert(zero_result_cause != NULL);
726 /*
727 Even with zero matching rows, subqueries in the HAVING clause may
728 need to be evaluated if there are aggregate functions in the
729 query. If this JOIN is part of an outer query, subqueries in HAVING may
730 be evaluated several times in total; so subquery materialization makes
731 sense.
732 */
733 child_subquery_can_materialize= true;
734 trace_steps.end(); // because all steps are done
735 Opt_trace_object(trace, "empty_result")
736 .add_alnum("cause", zero_result_cause);
737
738 having_for_explain= having_cond;
739 error= 0;
740
741 if (!qep_tab && best_ref)
742 {
743 /*
744 After creation of JOIN_TABs in make_join_plan(), we have shortcut due to
745 some zero_result_cause. For simplification, if we have JOIN_TABs we
746 want QEP_TABs too.
747 */
748 if (alloc_qep(tables))
749 DBUG_RETURN(1); /* purecov: inspected */
750 unplug_join_tabs();
751 }
752
753 set_plan_state(ZERO_RESULT);
754 DBUG_RETURN(0);
755 }
756
757
758 /**
759 Substitute all expressions in the WHERE condition and ORDER/GROUP lists
760 that match generated columns (GC) expressions with GC fields, if any.
761
762 @details This function does 3 things:
763 1) Creates list of all GC fields that are a part of a key and the GC
764 expression is a function. All query tables are scanned. If there's no
765 such fields, function exits.
766 2) By means of Item::compile() WHERE clause is transformed.
767 @see Item_func::gc_subst_transformer() for details.
768 3) If there's ORDER/GROUP BY clauses, this function tries to substitute
769 expressions in these lists with GC too. It removes from the list of
770 indexed GC all elements which index blocked by hints. This is done to
771 reduce amount of further work. Next it goes through ORDER/GROUP BY list
772 and matches the expression in it against GC expressions in indexed GC
773 list. When a match is found, the expression is replaced with a new
774 Item_field for the matched GC field. Also, this new field is added to
775 the hidden part of all_fields list.
776
777 @param thd thread handle
778 @param select_lex the current select
779 @param where_cond the WHERE condition, possibly NULL
780 @param group_list the GROUP BY clause, possibly NULL
781 @param order the ORDER BY clause, possibly NULL
782
783 @return true if the GROUP BY clause or the ORDER BY clause was
784 changed, false otherwise
785 */
786
substitute_gc(THD * thd,SELECT_LEX * select_lex,Item * where_cond,ORDER * group_list,ORDER * order)787 bool substitute_gc(THD *thd, SELECT_LEX *select_lex, Item *where_cond,
788 ORDER *group_list, ORDER *order)
789 {
790 List<Field> indexed_gc;
791 Opt_trace_context * const trace= &thd->opt_trace;
792 Opt_trace_object trace_wrapper(trace);
793 Opt_trace_object subst_gc(trace, "substitute_generated_columns");
794
795 // Collect all GCs that are a part of a key
796 for (TABLE_LIST *tl= select_lex->leaf_tables;
797 tl;
798 tl= tl->next_leaf)
799 {
800 if (tl->table->s->keys == 0)
801 continue;
802 for (uint i= 0; i < tl->table->s->fields; i++)
803 {
804 Field *fld= tl->table->field[i];
805 if (fld->is_gcol() && !fld->part_of_key.is_clear_all() &&
806 fld->gcol_info->expr_item->can_be_substituted_for_gc())
807 {
808 // Don't check allowed keys here as conditions/group/order use
809 // different keymaps for that.
810 indexed_gc.push_back(fld);
811 }
812 }
813 }
814 // No GC in the tables used in the query
815 if (indexed_gc.elements == 0)
816 return false;
817
818 if (where_cond)
819 {
820 // Item_func::compile will dereference this pointer, provide valid value.
821 uchar i, *dummy= &i;
822 where_cond->compile(&Item::gc_subst_analyzer, &dummy,
823 &Item::gc_subst_transformer, (uchar*) &indexed_gc);
824 subst_gc.add("resulting_condition", where_cond);
825 }
826
827 if (!(group_list || order))
828 return false;
829 // Filter out GCs that do not have index usable for GROUP/ORDER
830 Field *gc;
831 List_iterator<Field> li(indexed_gc);
832
833 while ((gc= li++))
834 {
835 key_map tkm= gc->part_of_key;
836 tkm.intersect(group_list ? gc->table->keys_in_use_for_group_by :
837 gc->table->keys_in_use_for_order_by);
838 if (tkm.is_clear_all())
839 li.remove();
840 }
841 if (!indexed_gc.elements)
842 return false;
843
844 // Index could be used for ORDER only if there is no GROUP
845 ORDER *list= group_list ? group_list : order;
846 bool changed= false;
847 for (ORDER *ord= list; ord; ord= ord->next)
848 {
849 li.rewind();
850 if (!(*ord->item)->can_be_substituted_for_gc())
851 continue;
852 while ((gc= li++))
853 {
854 Item_func *tmp= pointer_cast<Item_func*>(*ord->item);
855 Item_field *field;
856 if ((field= get_gc_for_expr(&tmp, gc, gc->result_type())))
857 {
858
859 changed= true;
860 /* Add new field to field list. */
861 ord->item= select_lex->add_hidden_item(field);
862 break;
863 }
864 }
865 }
866 if (changed && trace->is_started())
867 {
868 String str;
869 st_select_lex::print_order(&str, list,
870 enum_query_type(QT_TO_SYSTEM_CHARSET |
871 QT_SHOW_SELECT_NUMBER |
872 QT_NO_DEFAULT_DB));
873 subst_gc.add_utf8(group_list ? "resulting_GROUP_BY" :
874 "resulting_ORDER_BY",
875 str.ptr(), str.length());
876 }
877 return changed;
878 }
879
880
881 /**
882 Sets the plan's state of the JOIN. This is always the final step of
883 optimization; starting from this call, we expose the plan to other
884 connections (via EXPLAIN CONNECTION) so the plan has to be final.
885 QEP_TAB's quick_optim, condition_optim and keyread_optim are set here.
886 */
set_plan_state(enum_plan_state plan_state_arg)887 void JOIN::set_plan_state(enum_plan_state plan_state_arg)
888 {
889 // A plan should not change to another plan:
890 assert(plan_state_arg == NO_PLAN || plan_state == NO_PLAN);
891 if (plan_state == NO_PLAN && plan_state_arg != NO_PLAN)
892 {
893 if (qep_tab != NULL)
894 {
895 /*
896 We want to cover primary tables, tmp tables (they may have a sort, so
897 their "quick" and "condition" may change when execution runs the
898 sort), and sj-mat inner tables. Note that make_tmp_tables_info() may
899 have added a sort to the first non-const primary table, so it's
900 important to do those assignments after make_tmp_tables_info().
901 */
902 for (uint i= const_tables; i < tables; ++i)
903 {
904 qep_tab[i].set_quick_optim();
905 qep_tab[i].set_condition_optim();
906 qep_tab[i].set_keyread_optim();
907 }
908 }
909 }
910
911 DEBUG_SYNC(thd, "before_set_plan");
912
913 // If SQLCOM_END, no thread is explaining our statement anymore.
914 const bool need_lock= thd->query_plan.get_command() != SQLCOM_END;
915
916 if (need_lock)
917 thd->lock_query_plan();
918 plan_state= plan_state_arg;
919 if (need_lock)
920 thd->unlock_query_plan();
921 }
922
923
alloc_qep(uint n)924 bool JOIN::alloc_qep(uint n)
925 {
926 // Just to be sure that type plan_idx is wide enough:
927 compile_time_assert(MAX_TABLES <= INT_MAX8);
928
929 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
930
931 qep_tab= new(thd->mem_root) QEP_TAB[n];
932 if (!qep_tab)
933 return true; /* purecov: inspected */
934 for (uint i= 0; i < n; ++i)
935 qep_tab[i].init(best_ref[i]);
936 return false;
937 }
938
939
init(JOIN_TAB * jt)940 void QEP_TAB::init(JOIN_TAB *jt)
941 {
942 jt->share_qs(this);
943 set_table(table()); // to update table()->reginfo.qep_tab
944 table_ref= jt->table_ref;
945 }
946
947
948 /// @returns semijoin strategy for this table.
get_sj_strategy() const949 uint QEP_TAB::get_sj_strategy() const
950 {
951 if (first_sj_inner() == NO_PLAN_IDX)
952 return SJ_OPT_NONE;
953 const uint s= join()->qep_tab[first_sj_inner()].position()->sj_strategy;
954 assert(s != SJ_OPT_NONE);
955 return s;
956 }
957
958 /**
959 Return the index used for a table in a QEP
960
961 The various access methods have different places where the index/key
962 number is stored, so this function is needed to return the correct value.
963
964 @returns index number, or MAX_KEY if not applicable.
965
966 JT_SYSTEM and JT_ALL does not use an index, and will always return MAX_KEY.
967
968 JT_INDEX_MERGE supports more than one index. Hence MAX_KEY is returned and
969 a further inspection is needed.
970 */
effective_index() const971 uint QEP_TAB::effective_index() const
972 {
973 switch (type())
974 {
975 case JT_SYSTEM:
976 assert(ref().key == -1);
977 return MAX_KEY;
978
979 case JT_CONST:
980 case JT_EQ_REF:
981 case JT_REF_OR_NULL:
982 case JT_REF:
983 assert(ref().key != -1);
984 return uint(ref().key);
985
986 case JT_INDEX_SCAN:
987 case JT_FT:
988 return index();
989
990 case JT_INDEX_MERGE:
991 assert(quick()->index == MAX_KEY);
992 return MAX_KEY;
993
994 case JT_RANGE:
995 return quick()->index;
996
997 case JT_ALL:
998 default:
999 // @todo Check why JT_UNKNOWN is a valid value here.
1000 assert(type() == JT_ALL || type() == JT_UNKNOWN);
1001 return MAX_KEY;
1002 }
1003 }
1004
get_sj_strategy() const1005 uint JOIN_TAB::get_sj_strategy() const
1006 {
1007 if (first_sj_inner() == NO_PLAN_IDX)
1008 return SJ_OPT_NONE;
1009 ASSERT_BEST_REF_IN_JOIN_ORDER(join());
1010 JOIN_TAB *tab= join()->best_ref[first_sj_inner()];
1011 uint s= tab->position()->sj_strategy;
1012 assert(s != SJ_OPT_NONE);
1013 return s;
1014 }
1015
1016
replace_index_subquery()1017 int JOIN::replace_index_subquery()
1018 {
1019 DBUG_ENTER("replace_index_subquery");
1020 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
1021
1022 if (group_list ||
1023 !(unit->item && unit->item->substype() == Item_subselect::IN_SUBS) ||
1024 primary_tables != 1 || !where_cond ||
1025 unit->is_union())
1026 DBUG_RETURN(0);
1027
1028 // Guaranteed by remove_redundant_subquery_clauses():
1029 assert(order == NULL && !select_distinct);
1030
1031 subselect_engine *engine= NULL;
1032 Item_in_subselect * const in_subs=
1033 static_cast<Item_in_subselect *>(unit->item);
1034 enum join_type type= JT_UNKNOWN;
1035
1036 JOIN_TAB *const first_join_tab= best_ref[0];
1037
1038 if (in_subs->exec_method == Item_exists_subselect::EXEC_MATERIALIZATION)
1039 {
1040 // We cannot have two engines at the same time
1041 }
1042 else if (having_cond == NULL)
1043 {
1044 if (first_join_tab->type() == JT_EQ_REF &&
1045 first_join_tab->ref().items[0]->item_name.ptr() == in_left_expr_name)
1046 {
1047 type= JT_UNIQUE_SUBQUERY;
1048 /*
1049 This uses test_if_ref(), which needs access to JOIN_TAB::join_cond() so
1050 it must be done before we get rid of JOIN_TAB.
1051 */
1052 remove_subq_pushed_predicates();
1053 }
1054 else if (first_join_tab->type() == JT_REF &&
1055 first_join_tab->ref().items[0]->item_name.ptr() == in_left_expr_name)
1056 {
1057 type= JT_INDEX_SUBQUERY;
1058 remove_subq_pushed_predicates();
1059 }
1060 }
1061 else if (first_join_tab->type() == JT_REF_OR_NULL &&
1062 first_join_tab->ref().items[0]->item_name.ptr() == in_left_expr_name &&
1063 having_cond->item_name.ptr() == in_having_cond)
1064 {
1065 type= JT_INDEX_SUBQUERY;
1066 where_cond= remove_additional_cond(where_cond);
1067 }
1068
1069 if (type == JT_UNKNOWN)
1070 DBUG_RETURN(0);
1071
1072 if (alloc_qep(tables))
1073 DBUG_RETURN(-1); /* purecov: inspected */
1074 unplug_join_tabs();
1075
1076 error= 0;
1077 QEP_TAB *const first_qep_tab= &qep_tab[0];
1078
1079 if (first_qep_tab->table()->covering_keys.is_set(first_qep_tab->ref().key))
1080 {
1081 assert(!first_qep_tab->table()->no_keyread);
1082 first_qep_tab->table()->set_keyread(true);
1083 }
1084 // execution uses where_cond:
1085 first_qep_tab->set_condition(where_cond);
1086
1087 engine=
1088 new subselect_indexsubquery_engine(thd, first_qep_tab, unit->item,
1089 where_cond,
1090 having_cond,
1091 // check_null
1092 first_qep_tab->type() == JT_REF_OR_NULL,
1093 // unique
1094 type == JT_UNIQUE_SUBQUERY);
1095 /**
1096 @todo If having_cond!=NULL we pass unique=false. But for this query:
1097 (oe1, oe2) IN (SELECT primary_key, non_key_maybe_null_field FROM tbl)
1098 we could use "unique=true" for the first index component and let
1099 Item_is_not_null_test(non_key_maybe_null_field) handle the second.
1100 */
1101
1102 first_qep_tab->set_type(type);
1103
1104 if (!unit->item->change_engine(engine))
1105 DBUG_RETURN(1);
1106 else // error:
1107 DBUG_RETURN(-1); /* purecov: inspected */
1108 }
1109
1110
optimize_distinct_group_order()1111 bool JOIN::optimize_distinct_group_order()
1112 {
1113 DBUG_ENTER("optimize_distinct_group_order");
1114 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
1115
1116 /* Optimize distinct away if possible */
1117 {
1118 ORDER *org_order= order;
1119 order= ORDER_with_src(remove_const(order, where_cond, 1, &simple_order,
1120 "ORDER BY"),
1121 order.src);
1122 if (thd->is_error())
1123 {
1124 error= 1;
1125 DBUG_PRINT("error",("Error from remove_const"));
1126 DBUG_RETURN(true);
1127 }
1128
1129 /*
1130 If we are using ORDER BY NULL or ORDER BY const_expression,
1131 return result in any order (even if we are using a GROUP BY)
1132 */
1133 if (!order && org_order)
1134 skip_sort_order= 1;
1135 }
1136 /*
1137 Check if we can optimize away GROUP BY/DISTINCT.
1138 We can do that if there are no aggregate functions, the
1139 fields in DISTINCT clause (if present) and/or columns in GROUP BY
1140 (if present) contain direct references to all key parts of
1141 an unique index (in whatever order) and if the key parts of the
1142 unique index cannot contain NULLs.
1143 Note that the unique keys for DISTINCT and GROUP BY should not
1144 be the same (as long as they are unique).
1145
1146 The FROM clause must contain a single non-constant table.
1147
1148 @todo Apart from the LIS test, every condition depends only on facts
1149 which can be known in SELECT_LEX::prepare(), possibly this block should
1150 move there.
1151 */
1152
1153 JOIN_TAB *const tab= best_ref[const_tables];
1154
1155 if (plan_is_single_table() &&
1156 (group_list || select_distinct) &&
1157 !tmp_table_param.sum_func_count &&
1158 (!tab->quick() ||
1159 tab->quick()->get_type() !=
1160 QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX))
1161 {
1162 if (group_list && rollup.state == ROLLUP::STATE_NONE &&
1163 list_contains_unique_index(tab,
1164 find_field_in_order_list,
1165 (void *) group_list))
1166 {
1167 /*
1168 We have found that grouping can be removed since groups correspond to
1169 only one row anyway, but we still have to guarantee correct result
1170 order. The line below effectively rewrites the query from GROUP BY
1171 <fields> to ORDER BY <fields>. There are three exceptions:
1172 - if skip_sort_order is set (see above), then we can simply skip
1173 GROUP BY;
1174 - if IN(subquery), likewise (see remove_redundant_subquery_clauses())
1175 - we can only rewrite ORDER BY if the ORDER BY fields are 'compatible'
1176 with the GROUP BY ones, i.e. either one is a prefix of another.
1177 We only check if the ORDER BY is a prefix of GROUP BY. In this case
1178 test_if_subpart() copies the ASC/DESC attributes from the original
1179 ORDER BY fields.
1180 If GROUP BY is a prefix of ORDER BY, then it is safe to leave
1181 'order' as is.
1182 */
1183 if (!order || test_if_subpart(group_list, order))
1184 order= (skip_sort_order ||
1185 (unit->item && unit->item->substype() ==
1186 Item_subselect::IN_SUBS)) ? NULL : group_list;
1187
1188 /*
1189 If we have an IGNORE INDEX FOR GROUP BY(fields) clause, this must be
1190 rewritten to IGNORE INDEX FOR ORDER BY(fields).
1191 */
1192 best_ref[0]->table()->keys_in_use_for_order_by=
1193 best_ref[0]->table()->keys_in_use_for_group_by;
1194 group_list= 0;
1195 grouped= false;
1196 }
1197 if (select_distinct &&
1198 list_contains_unique_index(tab,
1199 find_field_in_item_list,
1200 (void *) &fields_list))
1201 {
1202 select_distinct= 0;
1203 }
1204 }
1205 if (!(group_list || tmp_table_param.sum_func_count) &&
1206 select_distinct &&
1207 plan_is_single_table() &&
1208 rollup.state == ROLLUP::STATE_NONE)
1209 {
1210 /*
1211 We are only using one table. In this case we change DISTINCT to a
1212 GROUP BY query if:
1213 - The GROUP BY can be done through indexes (no sort) and the ORDER
1214 BY only uses selected fields.
1215 (In this case we can later optimize away GROUP BY and ORDER BY)
1216 - We are scanning the whole table without LIMIT
1217 This can happen if:
1218 - We are using CALC_FOUND_ROWS
1219 - We are using an ORDER BY that can't be optimized away.
1220
1221 We don't want to use this optimization when we are using LIMIT
1222 because in this case we can just create a temporary table that
1223 holds LIMIT rows and stop when this table is full.
1224 */
1225 if (order)
1226 {
1227 skip_sort_order=
1228 test_if_skip_sort_order(tab, order, m_select_limit,
1229 true, // no_changes
1230 &tab->table()->keys_in_use_for_order_by,
1231 "ORDER BY");
1232 count_field_types(select_lex, &tmp_table_param, all_fields, false, false);
1233 }
1234 ORDER *o;
1235 bool all_order_fields_used;
1236 if ((o= create_distinct_group(thd, ref_ptrs,
1237 order, fields_list, all_fields,
1238 &all_order_fields_used)))
1239 {
1240 group_list= ORDER_with_src(o, ESC_DISTINCT);
1241 const bool skip_group=
1242 skip_sort_order &&
1243 test_if_skip_sort_order(tab, group_list, m_select_limit,
1244 true, // no_changes
1245 &tab->table()->keys_in_use_for_group_by,
1246 "GROUP BY");
1247 count_field_types(select_lex, &tmp_table_param, all_fields, false, false);
1248 if ((skip_group && all_order_fields_used) ||
1249 m_select_limit == HA_POS_ERROR ||
1250 (order && !skip_sort_order))
1251 {
1252 /* Change DISTINCT to GROUP BY */
1253 select_distinct= 0;
1254 no_order= !order;
1255 if (all_order_fields_used)
1256 {
1257 if (order && skip_sort_order)
1258 {
1259 /*
1260 Force MySQL to read the table in sorted order to get result in
1261 ORDER BY order.
1262 */
1263 tmp_table_param.quick_group=0;
1264 }
1265 order=0;
1266 }
1267 grouped= true; // For end_write_group
1268 }
1269 else
1270 group_list= 0;
1271 }
1272 else if (thd->is_fatal_error) // End of memory
1273 DBUG_RETURN(true);
1274 }
1275 simple_group= 0;
1276 {
1277 ORDER *old_group_list= group_list;
1278 group_list= ORDER_with_src(remove_const(group_list, where_cond,
1279 rollup.state == ROLLUP::STATE_NONE,
1280 &simple_group, "GROUP BY"),
1281 group_list.src);
1282
1283 if (thd->is_error())
1284 {
1285 error= 1;
1286 DBUG_PRINT("error",("Error from remove_const"));
1287 DBUG_RETURN(true);
1288 }
1289 if (old_group_list && !group_list)
1290 select_distinct= 0;
1291 }
1292 if (!group_list && grouped)
1293 {
1294 order=0; // The output has only one row
1295 simple_order=1;
1296 select_distinct= 0; // No need in distinct for 1 row
1297 group_optimized_away= 1;
1298 }
1299
1300 calc_group_buffer(this, group_list);
1301 send_group_parts= tmp_table_param.group_parts; /* Save org parts */
1302
1303 if (test_if_subpart(group_list, order) ||
1304 (!group_list && tmp_table_param.sum_func_count))
1305 {
1306 order=0;
1307 if (is_indexed_agg_distinct(this, NULL))
1308 sort_and_group= 0;
1309 }
1310
1311 DBUG_RETURN(false);
1312 }
1313
1314
test_skip_sort()1315 void JOIN::test_skip_sort()
1316 {
1317 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
1318 JOIN_TAB *const tab= best_ref[const_tables];
1319
1320 assert(ordered_index_usage == ordered_index_void);
1321
1322 if (group_list) // GROUP BY honoured first
1323 // (DISTINCT was rewritten to GROUP BY if skippable)
1324 {
1325 /*
1326 When there is SQL_BIG_RESULT do not sort using index for GROUP BY,
1327 and thus force sorting on disk unless a group min-max optimization
1328 is going to be used as it is applied now only for one table queries
1329 with covering indexes.
1330 */
1331 if (!(select_lex->active_options() & SELECT_BIG_RESULT || with_json_agg) ||
1332 (tab->quick() &&
1333 tab->quick()->get_type() ==
1334 QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX))
1335 {
1336 if (simple_group && // GROUP BY is possibly skippable
1337 !select_distinct) // .. if not preceded by a DISTINCT
1338 {
1339 /*
1340 Calculate a possible 'limit' of table rows for 'GROUP BY':
1341 A specified 'LIMIT' is relative to the final resultset.
1342 'need_tmp' implies that there will be more postprocessing
1343 so the specified 'limit' should not be enforced yet.
1344 */
1345 const ha_rows limit = need_tmp ? HA_POS_ERROR : m_select_limit;
1346
1347 if (test_if_skip_sort_order(tab, group_list, limit, false,
1348 &tab->table()->keys_in_use_for_group_by,
1349 "GROUP BY"))
1350 {
1351 ordered_index_usage= ordered_index_group_by;
1352 }
1353 }
1354
1355 /*
1356 If we are going to use semi-join LooseScan, it will depend
1357 on the selected index scan to be used. If index is not used
1358 for the GROUP BY, we risk that sorting is put on the LooseScan
1359 table. In order to avoid this, force use of temporary table.
1360 TODO: Explain the quick_group part of the test below.
1361 */
1362 if ((ordered_index_usage != ordered_index_group_by) &&
1363 (tmp_table_param.quick_group ||
1364 (tab->emb_sj_nest &&
1365 tab->position()->sj_strategy == SJ_OPT_LOOSE_SCAN)))
1366 {
1367 need_tmp= true;
1368 simple_order= simple_group= false; // Force tmp table without sort
1369 }
1370 }
1371 }
1372 else if (order && // ORDER BY wo/ preceding GROUP BY
1373 (simple_order || skip_sort_order)) // which is possibly skippable
1374 {
1375 if (test_if_skip_sort_order(tab, order, m_select_limit, false,
1376 &tab->table()->keys_in_use_for_order_by,
1377 "ORDER BY"))
1378 {
1379 ordered_index_usage= ordered_index_order_by;
1380 }
1381 }
1382 }
1383
1384
1385 /**
1386 Test if ORDER BY is a single MATCH function(ORDER BY MATCH)
1387 and sort order is descending.
1388
1389 @param order pointer to ORDER struct.
1390
1391 @retval
1392 Pointer to MATCH function if order is 'ORDER BY MATCH() DESC'
1393 @retval
1394 NULL otherwise
1395 */
1396
test_if_ft_index_order(ORDER * order)1397 static Item_func_match *test_if_ft_index_order(ORDER *order)
1398 {
1399 if (order && order->next == NULL &&
1400 order->direction == ORDER::ORDER_DESC &&
1401 (*order->item)->type() == Item::FUNC_ITEM &&
1402 ((Item_func*) (*order->item))->functype() == Item_func::FT_FUNC)
1403 return static_cast<Item_func_match*> (*order->item)->get_master();
1404
1405 return NULL;
1406 }
1407
1408 /**
1409 Test if this is a prefix index.
1410
1411 @param table table
1412 @param idx index to check
1413
1414 @return TRUE if this is a prefix index
1415 */
is_prefix_index(TABLE * table,uint idx)1416 bool is_prefix_index(TABLE* table, uint idx)
1417 {
1418 if (!table || !table->key_info)
1419 {
1420 return false;
1421 }
1422 KEY* key_info = table->key_info;
1423 uint key_parts = key_info[idx].user_defined_key_parts;
1424 KEY_PART_INFO* key_part = key_info[idx].key_part;
1425
1426 for (uint i = 0; i < key_parts; i++, key_part++)
1427 {
1428 if (key_part->field &&
1429 (key_part->length !=
1430 table->field[key_part->fieldnr - 1]->key_length() &&
1431 !(key_info->flags & (HA_FULLTEXT | HA_SPATIAL))))
1432 {
1433 return true;
1434 }
1435 }
1436 return false;
1437 }
1438
1439 /**
1440 Test if one can use the key to resolve ordering.
1441
1442 @param order Sort order
1443 @param table Table to sort
1444 @param idx Index to check
1445 @param[out] used_key_parts NULL by default, otherwise return value for
1446 used key parts.
1447
1448 @note
1449 used_key_parts is set to correct key parts used if return value != 0
1450 (On other cases, used_key_part may be changed)
1451 Note that the value may actually be greater than the number of index
1452 key parts. This can happen for storage engines that have the primary
1453 key parts as a suffix for every secondary key.
1454
1455 @retval
1456 1 key is ok.
1457 @retval
1458 0 Key can't be used
1459 @retval
1460 -1 Reverse key can be used
1461 */
1462
test_if_order_by_key(ORDER * order,TABLE * table,uint idx,uint * used_key_parts)1463 int test_if_order_by_key(ORDER *order, TABLE *table, uint idx,
1464 uint *used_key_parts)
1465 {
1466 KEY_PART_INFO *key_part,*key_part_end;
1467 key_part=table->key_info[idx].key_part;
1468 key_part_end=key_part+table->key_info[idx].user_defined_key_parts;
1469 key_part_map const_key_parts=table->const_key_parts[idx];
1470 int reverse=0;
1471 uint key_parts;
1472 my_bool on_pk_suffix= FALSE;
1473 DBUG_ENTER("test_if_order_by_key");
1474
1475 for (; order ; order=order->next, const_key_parts>>=1)
1476 {
1477
1478 /*
1479 Since only fields can be indexed, ORDER BY <something> that is
1480 not a field cannot be resolved by using an index.
1481 */
1482 Item *real_itm= (*order->item)->real_item();
1483 if (real_itm->type() != Item::FIELD_ITEM)
1484 DBUG_RETURN(0);
1485
1486 Field *field= static_cast<Item_field*>(real_itm)->field;
1487 int flag;
1488
1489 /*
1490 Skip key parts that are constants in the WHERE clause.
1491 These are already skipped in the ORDER BY by const_expression_in_where()
1492 */
1493 for (; const_key_parts & 1 && key_part < key_part_end ;
1494 const_key_parts>>= 1)
1495 key_part++;
1496
1497 /* Avoid usage of prefix index for sorting a partition table */
1498 if (table->part_info && key_part != table->key_info[idx].key_part &&
1499 key_part != key_part_end && is_prefix_index(table, idx))
1500 DBUG_RETURN(0);
1501
1502 if (key_part == key_part_end)
1503 {
1504 /*
1505 We are at the end of the key. Check if the engine has the primary
1506 key as a suffix to the secondary keys. If it has continue to check
1507 the primary key as a suffix.
1508 */
1509 if (!on_pk_suffix &&
1510 (table->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX) &&
1511 table->s->primary_key != MAX_KEY &&
1512 table->s->primary_key != idx)
1513 {
1514 on_pk_suffix= TRUE;
1515 key_part= table->key_info[table->s->primary_key].key_part;
1516 key_part_end=key_part +
1517 table->key_info[table->s->primary_key].user_defined_key_parts;
1518 const_key_parts=table->const_key_parts[table->s->primary_key];
1519
1520 for (; const_key_parts & 1 ; const_key_parts>>= 1)
1521 key_part++;
1522 /*
1523 The primary and secondary key parts were all const (i.e. there's
1524 one row). The sorting doesn't matter.
1525 */
1526 if (key_part == key_part_end && reverse == 0)
1527 {
1528 key_parts= 0;
1529 reverse= 1;
1530 goto ok;
1531 }
1532 }
1533 else
1534 DBUG_RETURN(0);
1535 }
1536
1537 if (key_part->field != field || !field->part_of_sortkey.is_set(idx))
1538 DBUG_RETURN(0);
1539
1540 const ORDER::enum_order keypart_order=
1541 (key_part->key_part_flag & HA_REVERSE_SORT) ?
1542 ORDER::ORDER_DESC : ORDER::ORDER_ASC;
1543 /* set flag to 1 if we can use read-next on key, else to -1 */
1544 flag= (order->direction == keypart_order) ? 1 : -1;
1545 if (reverse && flag != reverse)
1546 DBUG_RETURN(0);
1547 reverse=flag; // Remember if reverse
1548 key_part++;
1549 }
1550 if (on_pk_suffix)
1551 {
1552 uint used_key_parts_secondary= table->key_info[idx].user_defined_key_parts;
1553 uint used_key_parts_pk=
1554 (uint) (key_part - table->key_info[table->s->primary_key].key_part);
1555 key_parts= used_key_parts_pk + used_key_parts_secondary;
1556
1557 if (reverse == -1 &&
1558 (!(table->file->index_flags(idx, used_key_parts_secondary - 1, 1) &
1559 HA_READ_PREV) ||
1560 !(table->file->index_flags(table->s->primary_key,
1561 used_key_parts_pk - 1, 1) & HA_READ_PREV)))
1562 reverse= 0; // Index can't be used
1563 }
1564 else
1565 {
1566 key_parts= (uint) (key_part - table->key_info[idx].key_part);
1567 if (reverse == -1 &&
1568 !(table->file->index_flags(idx, key_parts-1, 1) & HA_READ_PREV))
1569 reverse= 0; // Index can't be used
1570 }
1571 ok:
1572 if (used_key_parts != NULL)
1573 *used_key_parts= key_parts;
1574 DBUG_RETURN(reverse);
1575 }
1576
1577
1578 /**
1579 Find shortest key suitable for full table scan.
1580
1581 @param table Table to scan
1582 @param usable_keys Allowed keys
1583
1584 @note
1585 As far as
1586 1) clustered primary key entry data set is a set of all record
1587 fields (key fields and not key fields) and
1588 2) secondary index entry data is a union of its key fields and
1589 primary key fields (at least InnoDB and its derivatives don't
1590 duplicate primary key fields there, even if the primary and
1591 the secondary keys have a common subset of key fields),
1592 then secondary index entry data is always a subset of primary key entry.
1593 Unfortunately, key_info[nr].key_length doesn't show the length
1594 of key/pointer pair but a sum of key field lengths only, thus
1595 we can't estimate index IO volume comparing only this key_length
1596 value of secondary keys and clustered PK.
1597 So, try secondary keys first, and choose PK only if there are no
1598 usable secondary covering keys or found best secondary key include
1599 all table fields (i.e. same as PK):
1600
1601 @return
1602 MAX_KEY no suitable key found
1603 key index otherwise
1604 */
1605
find_shortest_key(TABLE * table,const key_map * usable_keys)1606 uint find_shortest_key(TABLE *table, const key_map *usable_keys)
1607 {
1608 uint best= MAX_KEY;
1609 uint usable_clustered_pk= (table->file->primary_key_is_clustered() &&
1610 table->s->primary_key != MAX_KEY &&
1611 usable_keys->is_set(table->s->primary_key)) ?
1612 table->s->primary_key : MAX_KEY;
1613 if (!usable_keys->is_clear_all())
1614 {
1615 uint min_length= (uint) ~0;
1616 for (uint nr=0; nr < table->s->keys ; nr++)
1617 {
1618 if (nr == usable_clustered_pk)
1619 continue;
1620 if (usable_keys->is_set(nr))
1621 {
1622 /*
1623 Can not do full index scan on rtree index because it is not
1624 supported by Innodb, probably not supported by others either.
1625 */
1626 const KEY &key_ref= table->key_info[nr];
1627 if (key_ref.key_length < min_length &&
1628 !(key_ref.flags & HA_SPATIAL))
1629 {
1630 min_length=key_ref.key_length;
1631 best=nr;
1632 }
1633 }
1634 }
1635 }
1636 if (usable_clustered_pk != MAX_KEY)
1637 {
1638 /*
1639 If the primary key is clustered and found shorter key covers all table
1640 fields then primary key scan normally would be faster because amount of
1641 data to scan is the same but PK is clustered.
1642 It's safe to compare key parts with table fields since duplicate key
1643 parts aren't allowed.
1644 */
1645 if (best == MAX_KEY ||
1646 table->key_info[best].user_defined_key_parts >= table->s->fields)
1647 best= usable_clustered_pk;
1648 }
1649 return best;
1650 }
1651
1652 /**
1653 Test if a second key is the subkey of the first one.
1654
1655 @param key_part First key parts
1656 @param ref_key_part Second key parts
1657 @param ref_key_part_end Last+1 part of the second key
1658
1659 @note
1660 Second key MUST be shorter than the first one.
1661
1662 @retval
1663 1 is a subkey
1664 @retval
1665 0 no sub key
1666 */
1667
1668 inline bool
is_subkey(KEY_PART_INFO * key_part,KEY_PART_INFO * ref_key_part,KEY_PART_INFO * ref_key_part_end)1669 is_subkey(KEY_PART_INFO *key_part, KEY_PART_INFO *ref_key_part,
1670 KEY_PART_INFO *ref_key_part_end)
1671 {
1672 for (; ref_key_part < ref_key_part_end; key_part++, ref_key_part++)
1673 if (!key_part->field->eq(ref_key_part->field))
1674 return 0;
1675 return 1;
1676 }
1677
1678
1679 /**
1680 Test if REF_OR_NULL optimization will be used if the specified
1681 ref_key is used for REF-access to 'tab'
1682
1683 @retval
1684 true JT_REF_OR_NULL will be used
1685 @retval
1686 false no JT_REF_OR_NULL access
1687 */
1688
1689 static bool
is_ref_or_null_optimized(const JOIN_TAB * tab,uint ref_key)1690 is_ref_or_null_optimized(const JOIN_TAB *tab, uint ref_key)
1691 {
1692 if (tab->keyuse())
1693 {
1694 const Key_use *keyuse= tab->keyuse();
1695 while (keyuse->key != ref_key && keyuse->table_ref == tab->table_ref)
1696 keyuse++;
1697
1698 const table_map const_tables= tab->join()->const_table_map;
1699 while (keyuse->key == ref_key && keyuse->table_ref == tab->table_ref)
1700 {
1701 if (!(keyuse->used_tables & ~const_tables))
1702 {
1703 if (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)
1704 return true;
1705 }
1706 keyuse++;
1707 }
1708 }
1709 return false;
1710 }
1711
1712
1713 /**
1714 Test if we can use one of the 'usable_keys' instead of 'ref' key
1715 for sorting.
1716
1717 @param ref Number of key, used for WHERE clause
1718 @param usable_keys Keys for testing
1719
1720 @return
1721 - MAX_KEY If we can't use other key
1722 - the number of found key Otherwise
1723 */
1724
1725 static uint
test_if_subkey(ORDER * order,JOIN_TAB * tab,uint ref,uint ref_key_parts,const key_map * usable_keys)1726 test_if_subkey(ORDER *order, JOIN_TAB *tab, uint ref, uint ref_key_parts,
1727 const key_map *usable_keys)
1728 {
1729 uint nr;
1730 uint min_length= (uint) ~0;
1731 uint best= MAX_KEY;
1732 TABLE *table= tab->table();
1733 KEY_PART_INFO *ref_key_part= table->key_info[ref].key_part;
1734 KEY_PART_INFO *ref_key_part_end= ref_key_part + ref_key_parts;
1735
1736 for (nr= 0 ; nr < table->s->keys ; nr++)
1737 {
1738 if (usable_keys->is_set(nr) &&
1739 table->key_info[nr].key_length < min_length &&
1740 table->key_info[nr].user_defined_key_parts >= ref_key_parts &&
1741 is_subkey(table->key_info[nr].key_part, ref_key_part,
1742 ref_key_part_end) &&
1743 !is_ref_or_null_optimized(tab, nr) &&
1744 test_if_order_by_key(order, table, nr))
1745 {
1746 min_length= table->key_info[nr].key_length;
1747 best= nr;
1748 }
1749 }
1750 return best;
1751 }
1752
1753
1754 /**
1755 It is not obvious to see that test_if_skip_sort_order() never changes the
1756 plan if no_changes is true. So we double-check: creating an instance of this
1757 class saves some important access-path-related information of the current
1758 table; when the instance is destroyed, the latest access-path information is
1759 compared with saved data.
1760 */
1761
1762 class Plan_change_watchdog
1763 {
1764 #ifndef NDEBUG
1765 public:
1766 /**
1767 @param tab_arg table whose access path is being determined
1768 @param no_changes whether a change to the access path is allowed
1769 */
Plan_change_watchdog(const JOIN_TAB * tab_arg,const bool no_changes_arg)1770 Plan_change_watchdog(const JOIN_TAB *tab_arg, const bool no_changes_arg)
1771 {
1772 // Only to keep gcc 4.1.2-44 silent about uninitialized variables
1773 quick= NULL;
1774 quick_index= 0;
1775 if (no_changes_arg)
1776 {
1777 tab= tab_arg;
1778 type= tab->type();
1779 if ((quick= tab->quick()))
1780 quick_index= quick->index;
1781 use_quick= tab->use_quick;
1782 ref_key= tab->ref().key;
1783 ref_key_parts= tab->ref().key_parts;
1784 index= tab->index();
1785 }
1786 else
1787 {
1788 tab= NULL;
1789 // Only to keep gcc 4.1.2-44 silent about uninitialized variables
1790 type= JT_UNKNOWN;
1791 quick= NULL;
1792 ref_key= ref_key_parts= index= 0;
1793 use_quick= QS_NONE;
1794 }
1795 }
~Plan_change_watchdog()1796 ~Plan_change_watchdog()
1797 {
1798 if (tab == NULL)
1799 return;
1800 // changes are not allowed, we verify:
1801 assert(tab->type() == type);
1802 assert(tab->quick() == quick);
1803 assert((quick == NULL) || tab->quick()->index == quick_index);
1804 assert(tab->use_quick == use_quick);
1805 assert(tab->ref().key == ref_key);
1806 assert(tab->ref().key_parts == ref_key_parts);
1807 assert(tab->index() == index);
1808 }
1809 private:
1810 const JOIN_TAB *tab; ///< table, or NULL if changes are allowed
1811 enum join_type type; ///< copy of tab->type()
1812 // "Range / index merge" info
1813 const QUICK_SELECT_I *quick; ///< copy of tab->select->quick
1814 uint quick_index; ///< copy of tab->select->quick->index
1815 enum quick_type use_quick; ///< copy of tab->use_quick
1816 // "ref access" info
1817 int ref_key; ///< copy of tab->ref().key
1818 uint ref_key_parts;/// copy of tab->ref().key_parts
1819 // Other index-related info
1820 uint index; ///< copy of tab->index
1821 #else // in non-debug build, empty class
1822 public:
1823 Plan_change_watchdog(const JOIN_TAB *tab_arg, const bool no_changes_arg) {}
1824 #endif
1825 };
1826
1827
1828 /**
1829 Test if we can skip ordering by using an index.
1830
1831 If the current plan is to use an index that provides ordering, the
1832 plan will not be changed. Otherwise, if an index can be used, the
1833 JOIN_TAB / tab->select struct is changed to use the index.
1834
1835 The index must cover all fields in <order>, or it will not be considered.
1836
1837 @param tab NULL or JOIN_TAB of the accessed table
1838 @param order Linked list of ORDER BY arguments
1839 @param select_limit LIMIT value, or HA_POS_ERROR if no limit
1840 @param no_changes No changes will be made to the query plan.
1841 @param map key_map of applicable indexes.
1842 @param clause_type "ORDER BY" etc for printing in optimizer trace
1843
1844 @todo
1845 - sergeyp: Results of all index merge selects actually are ordered
1846 by clustered PK values.
1847
1848 @note
1849 This function may change tmp_table_param.precomputed_group_by. This
1850 affects how create_tmp_table() treats aggregation functions, so
1851 count_field_types() must be called again to make sure this is taken
1852 into consideration.
1853
1854 @retval
1855 0 We have to use filesort to do the sorting
1856 @retval
1857 1 We can use an index.
1858 */
1859
1860 static bool
test_if_skip_sort_order(JOIN_TAB * tab,ORDER * order,ha_rows select_limit,const bool no_changes,const key_map * map,const char * clause_type)1861 test_if_skip_sort_order(JOIN_TAB *tab, ORDER *order, ha_rows select_limit,
1862 const bool no_changes, const key_map *map,
1863 const char *clause_type)
1864 {
1865 int ref_key;
1866 uint ref_key_parts= 0;
1867 int order_direction= 0;
1868 uint used_key_parts;
1869 TABLE *const table= tab->table();
1870 JOIN *const join= tab->join();
1871 THD *const thd= join->thd;
1872 QUICK_SELECT_I *const save_quick= tab->quick();
1873 int best_key= -1;
1874 bool set_up_ref_access_to_key= false;
1875 bool can_skip_sorting= false; // used as return value
1876 int changed_key= -1;
1877 DBUG_ENTER("test_if_skip_sort_order");
1878
1879 /* Check that we are always called with first non-const table */
1880 assert((uint)tab->idx() == join->const_tables);
1881
1882 Plan_change_watchdog watchdog(tab, no_changes);
1883
1884 /* Sorting a single row can always be skipped */
1885 if (tab->type() == JT_EQ_REF ||
1886 tab->type() == JT_CONST ||
1887 tab->type() == JT_SYSTEM)
1888 {
1889 DBUG_RETURN(1);
1890 }
1891
1892 /*
1893 Check if FT index can be used to retrieve result in the required order.
1894 It is possible if ordering is on the first non-constant table.
1895 */
1896 if (join->order && join->simple_order)
1897 {
1898 /*
1899 Check if ORDER is DESC, ORDER BY is a single MATCH function.
1900 */
1901 Item_func_match *ft_func= test_if_ft_index_order(order);
1902 /*
1903 Two possible cases when we can skip sort order:
1904 1. FT_SORTED must be set(Natural mode, no ORDER BY).
1905 2. If FT_SORTED flag is not set then
1906 the engine should support deferred sorting. Deferred sorting means
1907 that sorting is postponed utill the start of index reading(InnoDB).
1908 In this case we set FT_SORTED flag here to let the engine know that
1909 internal sorting is needed.
1910 */
1911 if (ft_func && ft_func->ft_handler && ft_func->ordered_result())
1912 {
1913 /*
1914 FT index scan is used, so the only additional requirement is
1915 that ORDER BY MATCH function is the same as the function that
1916 is used for FT index.
1917 */
1918 if (tab->type() == JT_FT &&
1919 ft_func->eq(tab->position()->key->val, true))
1920 {
1921 ft_func->set_hints(join, FT_SORTED, select_limit, false);
1922 DBUG_RETURN(true);
1923 }
1924 /*
1925 No index is used, it's possible to use FT index for ORDER BY if
1926 LIMIT is present and does not exceed count of the records in FT index
1927 and there is no WHERE condition since a condition may potentially
1928 require more rows to be fetch from FT index.
1929 */
1930 else if (!tab->condition() &&
1931 select_limit != HA_POS_ERROR &&
1932 select_limit <= ft_func->get_count())
1933 {
1934 /* test_if_ft_index_order() always returns master MATCH function. */
1935 assert(!ft_func->master);
1936 /* ref is not set since there is no WHERE condition */
1937 assert(tab->ref().key == -1);
1938
1939 /*Make EXPLAIN happy */
1940 tab->set_type(JT_FT);
1941 tab->ref().key= ft_func->key;
1942 tab->ref().key_parts= 0;
1943 tab->set_index(ft_func->key);
1944 tab->set_ft_func(ft_func);
1945
1946 /* Setup FT handler */
1947 ft_func->set_hints(join, FT_SORTED, select_limit, true);
1948 ft_func->join_key= true;
1949 table->file->ft_handler= ft_func->ft_handler;
1950 DBUG_RETURN(true);
1951 }
1952 }
1953 }
1954
1955 /*
1956 Keys disabled by ALTER TABLE ... DISABLE KEYS should have already
1957 been taken into account.
1958 */
1959 key_map usable_keys= *map;
1960
1961 for (ORDER *tmp_order=order; tmp_order ; tmp_order=tmp_order->next)
1962 {
1963 Item *item= (*tmp_order->item)->real_item();
1964 if (item->type() != Item::FIELD_ITEM)
1965 {
1966 usable_keys.clear_all();
1967 DBUG_RETURN(0);
1968 }
1969 usable_keys.intersect(((Item_field*) item)->field->part_of_sortkey);
1970 if (usable_keys.is_clear_all())
1971 DBUG_RETURN(0); // No usable keys
1972 }
1973 if (tab->type() == JT_REF_OR_NULL || tab->type() == JT_FT)
1974 DBUG_RETURN(0);
1975
1976 ref_key= -1;
1977 /* Test if constant range in WHERE */
1978 if (tab->type() == JT_REF)
1979 {
1980 assert(tab->ref().key >= 0 && tab->ref().key_parts);
1981 ref_key= tab->ref().key;
1982 ref_key_parts= tab->ref().key_parts;
1983 }
1984 else if (tab->type() == JT_RANGE || tab->type() == JT_INDEX_MERGE)
1985 {
1986 // Range found by opt_range
1987 int quick_type= tab->quick()->get_type();
1988 /*
1989 assume results are not ordered when index merge is used
1990 TODO: sergeyp: Results of all index merge selects actually are ordered
1991 by clustered PK values.
1992 */
1993
1994 if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE ||
1995 quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION ||
1996 quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT)
1997 DBUG_RETURN(0);
1998 ref_key= tab->quick()->index;
1999 ref_key_parts= tab->quick()->used_key_parts;
2000 }
2001 else if (tab->type() == JT_INDEX_SCAN)
2002 {
2003 // The optimizer has decided to use an index scan.
2004 ref_key= tab->index();
2005 ref_key_parts= actual_key_parts(&table->key_info[tab->index()]);
2006 }
2007
2008 Opt_trace_context * const trace= &thd->opt_trace;
2009 Opt_trace_object trace_wrapper(trace);
2010 Opt_trace_object
2011 trace_skip_sort_order(trace, "reconsidering_access_paths_for_index_ordering");
2012 trace_skip_sort_order.add_alnum("clause", clause_type);
2013 Opt_trace_array trace_steps(trace, "steps");
2014
2015 if (ref_key >= 0)
2016 {
2017 /*
2018 We come here when ref/index scan/range scan access has been set
2019 up for this table. Do not change access method if ordering is
2020 provided already.
2021 */
2022 if (!usable_keys.is_set(ref_key))
2023 {
2024 /*
2025 We come here when ref_key is not among usable_keys, try to find a
2026 usable prefix key of that key.
2027 */
2028 uint new_ref_key;
2029 /*
2030 If using index only read, only consider other possible index only
2031 keys
2032 */
2033 if (table->covering_keys.is_set(ref_key))
2034 usable_keys.intersect(table->covering_keys);
2035
2036 if ((new_ref_key= test_if_subkey(order, tab, ref_key, ref_key_parts,
2037 &usable_keys)) < MAX_KEY)
2038 {
2039 /* Found key that can be used to retrieve data in sorted order */
2040 if (tab->ref().key >= 0)
2041 {
2042 /*
2043 We'll use ref access method on key new_ref_key. The actual change
2044 is done further down in this function where we update the plan.
2045 */
2046 set_up_ref_access_to_key= true;
2047 }
2048 else if (!no_changes)
2049 {
2050 /*
2051 The range optimizer constructed QUICK_RANGE for ref_key, and
2052 we want to use instead new_ref_key as the index. We can't
2053 just change the index of the quick select, because this may
2054 result in an incosistent QUICK_SELECT object. Below we
2055 create a new QUICK_SELECT from scratch so that all its
2056 parameres are set correctly by the range optimizer.
2057
2058 Note that the range optimizer is NOT called if
2059 no_changes==true. This reason is that the range optimizer
2060 cannot find a QUICK that can return ordered result unless
2061 index access (ref or index scan) is also able to do so
2062 (which test_if_order_by_key () will tell).
2063 Admittedly, range access may be much more efficient than
2064 e.g. index scan, but the only thing that matters when
2065 no_change==true is the answer to the question: "Is it
2066 possible to avoid sorting if an index is used to access
2067 this table?". The answer does not depend on the outcome of
2068 the range optimizer.
2069 */
2070 key_map new_ref_key_map; // Force the creation of quick select
2071 new_ref_key_map.set_bit(new_ref_key); // only for new_ref_key.
2072
2073 Opt_trace_object trace_wrapper(trace);
2074 Opt_trace_object
2075 trace_recest(trace, "rows_estimation");
2076 trace_recest.add_utf8_table(tab->table_ref).
2077 add_utf8("index", table->key_info[new_ref_key].name);
2078 QUICK_SELECT_I *qck;
2079 const bool no_quick=
2080 test_quick_select(thd, new_ref_key_map,
2081 0, // empty table_map
2082 join->calc_found_rows ?
2083 HA_POS_ERROR :
2084 join->unit->select_limit_cnt,
2085 false, // don't force quick range
2086 order->direction, tab,
2087 // we are after make_join_select():
2088 tab->condition(), &tab->needed_reg, &qck,
2089 tab->table()->force_index) <= 0;
2090 assert(tab->quick() == save_quick);
2091 tab->set_quick(qck);
2092 if (no_quick)
2093 {
2094 can_skip_sorting= false;
2095 goto fix_ICP;
2096 }
2097 }
2098 ref_key= new_ref_key;
2099 changed_key= new_ref_key;
2100 }
2101 }
2102 /* Check if we get the rows in requested sorted order by using the key */
2103 if (usable_keys.is_set(ref_key) &&
2104 (order_direction= test_if_order_by_key(order,table,ref_key,
2105 &used_key_parts)))
2106 goto check_reverse_order;
2107 }
2108 {
2109 /*
2110 There is no ref/index scan/range scan access set up for this
2111 table, or it does not provide the requested ordering. Do a
2112 cost-based search on all keys.
2113 */
2114 uint best_key_parts= 0;
2115 uint saved_best_key_parts= 0;
2116 int best_key_direction= 0;
2117 ha_rows table_records= table->file->stats.records;
2118
2119 /*
2120 If an index scan that cannot provide ordering has been selected
2121 then do not use the index scan key as starting hint to
2122 test_if_cheaper_ordering()
2123 */
2124 const int ref_key_hint= (order_direction == 0 &&
2125 tab->type() == JT_INDEX_SCAN) ? -1 : ref_key;
2126
2127 /*
2128 Does the query have a "FORCE INDEX [FOR GROUP BY] (idx)" (if
2129 clause is group by) or a "FORCE INDEX [FOR ORDER BY] (idx)" (if
2130 clause is order by)?
2131 */
2132 const bool is_group_by= join && join->grouped && order == join->group_list;
2133 const bool is_force_index= table->force_index ||
2134 (is_group_by ? table->force_index_group : table->force_index_order);
2135
2136 /*
2137 Find an ordering index alternative over the chosen plan iff
2138 prefer_ordering_index switch is on. This switch is overridden only when
2139 force index for order/group is specified.
2140 */
2141 if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_PREFER_ORDERING_INDEX) ||
2142 is_force_index)
2143 test_if_cheaper_ordering(tab, order, table, usable_keys,
2144 ref_key_hint,
2145 select_limit,
2146 &best_key, &best_key_direction,
2147 &select_limit, &best_key_parts,
2148 &saved_best_key_parts);
2149
2150 if (best_key < 0)
2151 {
2152 // No usable key has been found
2153 can_skip_sorting= false;
2154 goto fix_ICP;
2155 }
2156
2157 /*
2158 filesort() and join cache are usually faster than reading in
2159 index order and not using join cache. Don't use index scan
2160 unless:
2161 - the user specified FORCE INDEX [FOR {GROUP|ORDER} BY] (have to assume
2162 the user knows what's best)
2163 - the chosen index is clustered primary key (table scan is not cheaper)
2164 */
2165 if (!is_force_index &&
2166 (select_limit >= table_records) &&
2167 (tab->type() == JT_ALL &&
2168 join->primary_tables > join->const_tables + 1) &&
2169 ((unsigned) best_key != table->s->primary_key ||
2170 !table->file->primary_key_is_clustered()))
2171 {
2172 can_skip_sorting= false;
2173 goto fix_ICP;
2174 }
2175
2176 if (table->quick_keys.is_set(best_key) &&
2177 !tab->quick_order_tested.is_set(best_key) &&
2178 best_key != ref_key)
2179 {
2180 tab->quick_order_tested.set_bit(best_key);
2181 Opt_trace_object trace_wrapper(trace);
2182 Opt_trace_object
2183 trace_recest(trace, "rows_estimation");
2184 trace_recest.add_utf8_table(tab->table_ref).
2185 add_utf8("index", table->key_info[best_key].name);
2186
2187 key_map keys_to_use; // Force the creation of quick select
2188 keys_to_use.set_bit(best_key); // only best_key.
2189 QUICK_SELECT_I *qck;
2190 test_quick_select(thd,
2191 keys_to_use,
2192 0, // empty table_map
2193 join->calc_found_rows ?
2194 HA_POS_ERROR :
2195 join->unit->select_limit_cnt,
2196 true, // force quick range
2197 order->direction, tab, tab->condition(),
2198 &tab->needed_reg, &qck, tab->table()->force_index);
2199 /*
2200 If tab->quick() pointed to another quick than save_quick, we would
2201 lose access to it and leak memory.
2202 */
2203 assert(tab->quick() == save_quick || tab->quick() == NULL);
2204 tab->set_quick(qck);
2205 }
2206 order_direction= best_key_direction;
2207 /*
2208 saved_best_key_parts is actual number of used keyparts found by the
2209 test_if_order_by_key function. It could differ from keyinfo->key_parts,
2210 thus we have to restore it in case of desc order as it affects
2211 QUICK_SELECT_DESC behaviour.
2212 */
2213 used_key_parts= (order_direction == -1) ?
2214 saved_best_key_parts : best_key_parts;
2215 changed_key= best_key;
2216 // We will use index scan or range scan:
2217 set_up_ref_access_to_key= false;
2218 }
2219
2220 check_reverse_order:
2221 assert(order_direction != 0);
2222
2223 if (order_direction == -1) // If ORDER BY ... DESC
2224 {
2225 if (tab->quick())
2226 {
2227 /*
2228 Don't reverse the sort order, if it's already done.
2229 (In some cases test_if_order_by_key() can be called multiple times
2230 */
2231 if (tab->quick()->reverse_sorted())
2232 {
2233 can_skip_sorting= true;
2234 goto fix_ICP;
2235 }
2236
2237 if (tab->quick()->reverse_sort_possible())
2238 can_skip_sorting= true;
2239 else
2240 {
2241 can_skip_sorting= false;
2242 goto fix_ICP;
2243 }
2244 }
2245 else
2246 {
2247 // Other index access (ref or scan) poses no problem
2248 can_skip_sorting= true;
2249 }
2250 }
2251 else
2252 {
2253 // ORDER BY ASC poses no problem
2254 can_skip_sorting= true;
2255 }
2256
2257 assert(can_skip_sorting);
2258
2259 /*
2260 Update query plan with access pattern for doing
2261 ordered access according to what we have decided
2262 above.
2263 */
2264 if (!no_changes) // We are allowed to update QEP
2265 {
2266 if (set_up_ref_access_to_key)
2267 {
2268 /*
2269 We'll use ref access method on key changed_key. In general case
2270 the index search tuple for changed_ref_key will be different (e.g.
2271 when one index is defined as (part1, part2, ...) and another as
2272 (part1, part2(N), ...) and the WHERE clause contains
2273 "part1 = const1 AND part2=const2".
2274 So we build tab->ref() from scratch here.
2275 */
2276 Key_use *keyuse= tab->keyuse();
2277 while (keyuse->key != (uint)changed_key &&
2278 keyuse->table_ref == tab->table_ref)
2279 keyuse++;
2280
2281 if (create_ref_for_key(join, tab, keyuse, tab->prefix_tables()))
2282 {
2283 can_skip_sorting= false;
2284 goto fix_ICP;
2285 }
2286
2287 assert(tab->type() != JT_REF_OR_NULL && tab->type() != JT_FT);
2288
2289 // Changing the key makes filter_effect obsolete
2290 tab->position()->filter_effect= COND_FILTER_STALE;
2291 }
2292 else if (best_key >= 0)
2293 {
2294 /*
2295 If ref_key used index tree reading only ('Using index' in EXPLAIN),
2296 and best_key doesn't, then revert the decision.
2297 */
2298 if(!table->covering_keys.is_set(best_key))
2299 table->set_keyread(false);
2300 if (!tab->quick() || tab->quick() == save_quick) // created no QUICK
2301 {
2302 // Avoid memory leak:
2303 assert(tab->quick() == save_quick || tab->quick() == NULL);
2304 tab->set_quick(NULL);
2305 tab->set_index(best_key);
2306 tab->set_type(JT_INDEX_SCAN); // Read with index_first(), index_next()
2307 /*
2308 There is a bug. When we change here, e.g. from group_min_max to
2309 index scan: loose index scan expected to read a small number of rows
2310 (jumping through the index), this small number was in
2311 position()->rows_fetched; index scan will read much more, so
2312 rows_fetched should be updated. So should the filtering effect.
2313 It is visible in main.distinct in trunk:
2314 explain SELECT distinct a from t3 order by a desc limit 2;
2315 id select_type table partitions type possible_keys key key_len ref rows filtered Extra
2316 1 SIMPLE t3 NULL index a a 5 NULL 40 25.00 Using index
2317 "rows=40" should be ~200 i.e. # of records in table. Filter should be
2318 100.00 (no WHERE).
2319 */
2320 table->file->ha_index_or_rnd_end();
2321 if (thd->lex->is_explain())
2322 {
2323 /*
2324 @todo this neutralizes add_ref_to_table_cond(); as a result
2325 EXPLAIN shows no "using where" though real SELECT has one.
2326 */
2327 tab->ref().key= -1;
2328 tab->ref().key_parts= 0;
2329 }
2330 tab->position()->filter_effect= COND_FILTER_STALE;
2331 }
2332 else if (tab->type() != JT_ALL)
2333 {
2334 /*
2335 We're about to use a quick access to the table.
2336 We need to change the access method so as the quick access
2337 method is actually used.
2338 */
2339 assert(tab->quick());
2340 assert(tab->quick()->index==(uint)best_key);
2341 tab->set_type(calc_join_type(tab->quick()->get_type()));
2342 tab->use_quick=QS_RANGE;
2343 tab->ref().key= -1;
2344 tab->ref().key_parts=0; // Don't use ref key.
2345 if (tab->quick()->is_loose_index_scan())
2346 join->tmp_table_param.precomputed_group_by= TRUE;
2347 tab->position()->filter_effect= COND_FILTER_STALE;
2348 }
2349 } // best_key >= 0
2350
2351 if (order_direction == -1) // If ORDER BY ... DESC
2352 {
2353 if (tab->quick())
2354 {
2355 /* ORDER BY range_key DESC */
2356 QUICK_SELECT_I *tmp= tab->quick()->make_reverse(used_key_parts);
2357 if (!tmp)
2358 {
2359 /* purecov: begin inspected */
2360 can_skip_sorting= false; // Reverse sort failed -> filesort
2361 goto fix_ICP;
2362 /* purecov: end */
2363 }
2364 if (tab->quick() != tmp && tab->quick() != save_quick)
2365 delete tab->quick();
2366 tab->set_quick(tmp);
2367 tab->set_type(calc_join_type(tmp->get_type()));
2368 tab->position()->filter_effect= COND_FILTER_STALE;
2369 }
2370 else if (tab->type() == JT_REF &&
2371 tab->ref().key_parts <= used_key_parts)
2372 {
2373 /*
2374 SELECT * FROM t1 WHERE a=1 ORDER BY a DESC,b DESC
2375
2376 Use a traversal function that starts by reading the last row
2377 with key part (A) and then traverse the index backwards.
2378 */
2379 tab->reversed_access= true;
2380
2381 /*
2382 The current implementation of join_read_prev_same() does not
2383 work well in combination with ICP and can lead to increased
2384 execution time. Setting changed_key to the current key
2385 (based on that we change the access order for the key) will
2386 ensure that a pushed index condition will be cancelled.
2387 */
2388 changed_key= tab->ref().key;
2389 }
2390 else if (tab->type() == JT_INDEX_SCAN)
2391 tab->reversed_access= true;
2392 }
2393 else if (tab->quick())
2394 tab->quick()->need_sorted_output();
2395
2396 } // QEP has been modified
2397
2398 fix_ICP:
2399 /*
2400 Cleanup:
2401 We may have both a 'tab->quick()' and 'save_quick' (original)
2402 at this point. Delete the one that we won't use.
2403 */
2404 if (can_skip_sorting && !no_changes)
2405 {
2406 if (tab->type() == JT_INDEX_SCAN &&
2407 select_limit < table->file->stats.records)
2408 {
2409 tab->position()->rows_fetched= select_limit;
2410 tab->position()->filter_effect= COND_FILTER_STALE_NO_CONST;
2411 }
2412
2413 // Keep current (ordered) tab->quick()
2414 if (save_quick != tab->quick())
2415 delete save_quick;
2416 }
2417 else
2418 {
2419 // Restore original save_quick
2420 if (tab->quick() != save_quick)
2421 {
2422 delete tab->quick();
2423 tab->set_quick(save_quick);
2424 }
2425 }
2426
2427 trace_steps.end();
2428 Opt_trace_object
2429 trace_change_index(trace, "index_order_summary");
2430 trace_change_index.add_utf8_table(tab->table_ref)
2431 .add("index_provides_order", can_skip_sorting)
2432 .add_alnum("order_direction", order_direction == 1 ? "asc" :
2433 ((order_direction == -1) ? "desc" :
2434 "undefined"));
2435
2436 if (changed_key >= 0)
2437 {
2438 // switching to another index
2439 // Should be no pushed conditions at this point
2440 assert(!table->file->pushed_idx_cond);
2441 if (unlikely(trace->is_started()))
2442 {
2443 trace_change_index.add_utf8("index", table->key_info[changed_key].name);
2444 trace_change_index.add("plan_changed", !no_changes);
2445 if (!no_changes)
2446 trace_change_index.add_alnum("access_type", join_type_str[tab->type()]);
2447 }
2448 }
2449 else if (unlikely(trace->is_started()))
2450 {
2451 trace_change_index.add_utf8("index",
2452 ref_key >= 0 ?
2453 table->key_info[ref_key].name : "unknown");
2454 trace_change_index.add("plan_changed", false);
2455 }
2456 DBUG_RETURN(can_skip_sorting);
2457 }
2458
2459
2460 /**
2461 Prune partitions for all tables of a join (query block).
2462
2463 Requires that tables have been locked.
2464
2465 @returns false if success, true if error
2466 */
2467
prune_table_partitions()2468 bool JOIN::prune_table_partitions()
2469 {
2470 assert(select_lex->partitioned_table_count);
2471
2472 for (TABLE_LIST *tbl= select_lex->leaf_tables; tbl; tbl= tbl->next_leaf)
2473 {
2474 /*
2475 If tbl->embedding!=NULL that means that this table is in the inner
2476 part of the nested outer join, and we can't do partition pruning
2477 (TODO: check if this limitation can be lifted.
2478 This also excludes semi-joins. Is that intentional?)
2479 This will try to prune non-static conditions, which can
2480 be used after the tables are locked.
2481 */
2482 if (!tbl->embedding)
2483 {
2484 Item *prune_cond= tbl->join_cond_optim() ?
2485 tbl->join_cond_optim() : where_cond;
2486 if (prune_partitions(thd, tbl->table, prune_cond))
2487 return true;
2488 }
2489 }
2490
2491 return false;
2492 }
2493
2494
2495 /**
2496 A helper function to check whether it's better to use range than ref.
2497
2498 @details
2499 Heuristic: Switch from 'ref' to 'range' access if 'range'
2500 access can utilize more keyparts than 'ref' access. Conditions
2501 for doing switching:
2502
2503 1) Range access is possible Or tab->dodgy_ref_cost is set.
2504 2) This function is not relevant for FT, since there is no range access for
2505 that type of index.
2506 3) Used parts of key shouldn't have nullable parts & ref_or_null isn't used.
2507 4) 'ref' access depends on a constant, not a value read from a
2508 table earlier in the join sequence.
2509
2510 Rationale: if 'ref' depends on a value from another table,
2511 the join condition is not used to limit the rows read by
2512 'range' access (that would require dynamic range - 'Range
2513 checked for each record'). In other words, if 'ref' depends
2514 on a value from another table, we have a query with
2515 conditions of the form
2516
2517 this_table.idx_col1 = other_table.col AND <<- used by 'ref'
2518 this_table.idx_col1 OP <const> AND <<- used by 'range'
2519 this_table.idx_col2 OP <const> AND ... <<- used by 'range'
2520
2521 and an index on (idx_col1,idx_col2,...). But the fact that
2522 'range' access uses more keyparts does not mean that it is
2523 more selective than 'ref' access because these access types
2524 utilize different parts of the query condition. We
2525 therefore trust the cost based choice made by
2526 best_access_path() instead of forcing a heuristic choice
2527 here.
2528 5a) 'ref' access and 'range' access uses the same index.
2529 5b) 'range' access uses more keyparts than 'ref' access.
2530
2531 OR
2532
2533 6) Ref has borrowed the index estimate from range and created a cost
2534 estimate (See Optimize_table_order::find_best_ref). This will be a
2535 problem if range built it's row estimate using a larger number of key
2536 parts than ref. In such a case, shift to range access over the same
2537 index. So run the range optimizer with that index as the only choice.
2538 (Condition 5 is not relevant here since it has been tested in
2539 find_best_ref.)
2540
2541 @param thd THD To re-run range optimizer.
2542 @param tab JOIN_TAB To check the above conditions.
2543
2544 @return true Range is better than ref
2545 @return false Ref is better or switch isn't possible
2546
2547 @todo: This decision should rather be made in best_access_path()
2548 */
2549
can_switch_from_ref_to_range(THD * thd,JOIN_TAB * tab)2550 static bool can_switch_from_ref_to_range(THD *thd, JOIN_TAB *tab)
2551 {
2552 if ((tab->quick() || tab->dodgy_ref_cost) && // 1)
2553 tab->position()->key->keypart != FT_KEYPART) // 2)
2554 {
2555 uint keyparts= 0, length= 0;
2556 table_map dep_map= 0;
2557 bool maybe_null= false;
2558
2559 calc_length_and_keyparts(tab->position()->key, tab,
2560 tab->position()->key->key,
2561 tab->prefix_tables(), NULL, &length, &keyparts,
2562 &dep_map, &maybe_null);
2563 if (maybe_null || // 3)
2564 dep_map) // 4)
2565 return false;
2566
2567 if (tab->quick() &&
2568 tab->position()->key->key == tab->quick()->index) // 5a)
2569 return length < tab->quick()->max_used_key_length; // 5b)
2570 else if (tab->dodgy_ref_cost) // 6)
2571 {
2572 key_map new_ref_key_map;
2573 new_ref_key_map.set_bit(tab->position()->key->key);
2574
2575 Opt_trace_context * const trace= &thd->opt_trace;
2576 Opt_trace_object trace_wrapper(trace);
2577
2578 Opt_trace_object
2579 can_switch(trace, "check_if_range_uses_more_keyparts_than_ref");
2580 Opt_trace_object
2581 trace_setup_cond(trace, "rerunning_range_optimizer_for_single_index");
2582
2583 QUICK_SELECT_I *qck;
2584 if (test_quick_select(thd, new_ref_key_map,
2585 0, // empty table_map
2586 tab->join()->row_limit,
2587 false, // don't force quick range
2588 ORDER::ORDER_NOT_RELEVANT,
2589 tab,
2590 tab->join_cond() ? tab->join_cond() :
2591 tab->join()->where_cond,
2592 &tab->needed_reg,
2593 &qck, true) > 0)
2594 {
2595 if (length < qck->max_used_key_length)
2596 {
2597 delete tab->quick();
2598 tab->set_quick(qck);
2599 return true;
2600 }
2601 else
2602 {
2603 Opt_trace_object (trace, "access_type_unchanged").
2604 add("ref_key_length", length).
2605 add("range_key_length", qck->max_used_key_length);
2606 delete qck;
2607 }
2608 }
2609 }
2610 }
2611 return false;
2612 }
2613
2614 /**
2615 An utility function - apply heuristics and optimize access methods to tables.
2616 Currently this function can change REF to RANGE and ALL to INDEX scan if
2617 latter is considered to be better (not cost-based) than the former.
2618 @note Side effect - this function could set 'Impossible WHERE' zero
2619 result.
2620 */
2621
adjust_access_methods()2622 void JOIN::adjust_access_methods()
2623 {
2624 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
2625 for (uint i= const_tables; i < tables; i++)
2626 {
2627 JOIN_TAB *const tab= best_ref[i];
2628 TABLE_LIST *const tl= tab->table_ref;
2629
2630 if (tab->type() == JT_ALL)
2631 {
2632 /*
2633 It's possible to speedup query by switching from full table scan to
2634 the scan of covering index, due to less data being read.
2635 Prerequisites for this are:
2636 1) Keyread (i.e index only scan) is allowed (table isn't updated/deleted
2637 from)
2638 2) Covering indexes are available
2639 3) This isn't a derived table/materialized view
2640 */
2641 if (!tab->table()->no_keyread && // 1
2642 !tab->table()->covering_keys.is_clear_all() && // 2
2643 !tl->uses_materialization()) // 3
2644 {
2645 /*
2646 It has turned out that the change commented out below, while speeding
2647 things up for disk-bound loads, slows them down for cases when the data
2648 is in disk cache (see BUG#35850):
2649 // See bug #26447: "Using the clustered index for a table scan
2650 // is always faster than using a secondary index".
2651 if (table->s->primary_key != MAX_KEY &&
2652 table->file->primary_key_is_clustered())
2653 tab->index= table->s->primary_key;
2654 else
2655 tab->index=find_shortest_key(table, & table->covering_keys);
2656 */
2657 if (tab->position()->sj_strategy != SJ_OPT_LOOSE_SCAN)
2658 tab->set_index(find_shortest_key(tab->table(), &tab->table()->covering_keys));
2659 tab->set_type(JT_INDEX_SCAN); // Read with index_first / index_next
2660 // From table scan to index scan, thus filter effect needs no recalc.
2661 }
2662 }
2663 else if (tab->type() == JT_REF)
2664 {
2665 if (can_switch_from_ref_to_range(thd, tab))
2666 {
2667 tab->set_type(JT_RANGE);
2668
2669 Opt_trace_context * const trace= &thd->opt_trace;
2670 Opt_trace_object wrapper(trace);
2671 Opt_trace_object (trace, "access_type_changed").
2672 add_utf8_table(tl).
2673 add_utf8("index",
2674 tab->table()->key_info[tab->position()->key->key].name).
2675 add_alnum("old_type", "ref").
2676 add_alnum("new_type", join_type_str[tab->type()]).
2677 add_alnum("cause", "uses_more_keyparts");
2678
2679 tab->use_quick= QS_RANGE;
2680 tab->position()->filter_effect= COND_FILTER_STALE;
2681 }
2682 else
2683 {
2684 // Cleanup quick, REF/REF_OR_NULL/EQ_REF, will be clarified later
2685 delete tab->quick();
2686 tab->set_quick(NULL);
2687 }
2688 }
2689 // Ensure AM consistency
2690 assert(!(tab->quick() && (tab->type() == JT_REF || tab->type() == JT_ALL)));
2691 assert((tab->type() != JT_RANGE && tab->type() != JT_INDEX_MERGE) ||
2692 tab->quick());
2693 if (!tab->const_keys.is_clear_all() &&
2694 tab->table()->reginfo.impossible_range &&
2695 ((i == const_tables && tab->type() == JT_REF) ||
2696 ((tab->type() == JT_ALL || tab->type() == JT_RANGE ||
2697 tab->type() == JT_INDEX_MERGE || tab->type() == JT_INDEX_SCAN) &&
2698 tab->use_quick != QS_RANGE)) &&
2699 !tab->table_ref->is_inner_table_of_outer_join())
2700 zero_result_cause=
2701 "Impossible WHERE noticed after reading const tables";
2702 }
2703 }
2704
2705
alloc_jtab_array(THD * thd,uint table_count)2706 static JOIN_TAB *alloc_jtab_array(THD *thd, uint table_count)
2707 {
2708 JOIN_TAB *t= new (thd->mem_root) JOIN_TAB[table_count];
2709 if (!t)
2710 return NULL; /* purecov: inspected */
2711
2712 QEP_shared *qs= new (thd->mem_root) QEP_shared[table_count];
2713 if (!qs)
2714 return NULL; /* purecov: inspected */
2715
2716 for (uint i= 0; i < table_count; ++i)
2717 t[i].set_qs(qs++);
2718
2719 return t;
2720 }
2721
2722
2723 /**
2724 Set up JOIN_TAB structs according to the picked join order in best_positions.
2725 This allocates execution structures so may be called only after we have the
2726 very final plan. It must be called after
2727 Optimize_table_order::fix_semijoin_strategies().
2728
2729 @return False if success, True if error
2730
2731 @details
2732 - create join->join_tab array and copy from existing JOIN_TABs in join order
2733 - create helper structs for materialized semi-join handling
2734 - finalize semi-join strategy choices
2735 - Number of intermediate tables "tmp_tables" is calculated.
2736 - "tables" and "primary_tables" are recalculated.
2737 - for full and index scans info of estimated # of records is updated.
2738 - in a helper function:
2739 - all heuristics are applied and the final access method type is picked
2740 for each join_tab (only test_if_skip_sortorder() could override it)
2741 - AM consistency is ensured (e.g only range and index merge are allowed
2742 to have quick select set).
2743 - if "Impossible WHERE" is detected - appropriate zero_result_cause is
2744 set.
2745
2746 Notice that intermediate tables will not have a POSITION reference; and they
2747 will not have a TABLE reference before the final stages of code generation.
2748
2749 @todo the block which sets tab->type should move to adjust_access_methods
2750 for unification.
2751 */
2752
get_best_combination()2753 bool JOIN::get_best_combination()
2754 {
2755 DBUG_ENTER("JOIN::get_best_combination");
2756
2757 // At this point "tables" and "primary"tables" represent the same:
2758 assert(tables == primary_tables);
2759
2760 /*
2761 Allocate additional space for tmp tables.
2762 Number of plan nodes:
2763 # of regular input tables (including semi-joined ones) +
2764 # of semi-join nests for materialization +
2765 1? + // For GROUP BY
2766 1? + // For DISTINCT
2767 1? + // For aggregation functions aggregated in outer query
2768 // when used with distinct
2769 1? + // For ORDER BY
2770 1? // buffer result
2771 Up to 2 tmp tables are actually used, but it's hard to tell exact number
2772 at this stage.
2773 */
2774 uint num_tmp_tables= (group_list ? 1 : 0) +
2775 (select_distinct ?
2776 (tmp_table_param.outer_sum_func_count ? 2 : 1) : 0) +
2777 (order ? 1 : 0) +
2778 (select_lex->active_options() &
2779 (SELECT_BIG_RESULT | OPTION_BUFFER_RESULT) ? 1 : 0);
2780 if (num_tmp_tables > 2)
2781 num_tmp_tables= 2;
2782
2783 /*
2784 Rearrange queries with materialized semi-join nests so that the semi-join
2785 nest is replaced with a reference to a materialized temporary table and all
2786 materialized subquery tables are placed after the intermediate tables.
2787 After the following loop, "inner_target" is the position of the first
2788 subquery table (if any). "outer_target" is the position of first outer
2789 table, and will later be used to track the position of any materialized
2790 temporary tables.
2791 */
2792 const bool has_semijoin= !select_lex->sj_nests.is_empty();
2793 uint outer_target= 0;
2794 uint inner_target= primary_tables + num_tmp_tables;
2795 uint sjm_nests= 0;
2796
2797 if (has_semijoin)
2798 {
2799 for (uint tableno= 0; tableno < primary_tables; )
2800 {
2801 if (sj_is_materialize_strategy(best_positions[tableno].sj_strategy))
2802 {
2803 sjm_nests++;
2804 inner_target-= (best_positions[tableno].n_sj_tables - 1);
2805 tableno+= best_positions[tableno].n_sj_tables;
2806 }
2807 else
2808 tableno++;
2809 }
2810 }
2811
2812 JOIN_TAB *tmp_join_tabs= NULL;
2813 if (sjm_nests + num_tmp_tables)
2814 {
2815 // join_tab array only has "primary_tables" tables. We need those more:
2816 if (!(tmp_join_tabs= alloc_jtab_array(thd, sjm_nests + num_tmp_tables)))
2817 DBUG_RETURN(true); /* purecov: inspected */
2818 }
2819
2820 // To check that we fill the array correctly: fill it with zeros first
2821 memset(best_ref, 0, sizeof(JOIN_TAB*) * (primary_tables + sjm_nests +
2822 num_tmp_tables));
2823
2824 int sjm_index= tables; // Number assigned to materialized temporary table
2825 int remaining_sjm_inner= 0;
2826 bool err= false;
2827 for (uint tableno= 0; tableno < tables; tableno++)
2828 {
2829 POSITION *const pos= best_positions + tableno;
2830 if (has_semijoin && sj_is_materialize_strategy(pos->sj_strategy))
2831 {
2832 assert(outer_target < inner_target);
2833
2834 TABLE_LIST *const sj_nest= pos->table->emb_sj_nest;
2835
2836 // Handle this many inner tables of materialized semi-join
2837 remaining_sjm_inner= pos->n_sj_tables;
2838
2839 /*
2840 If we fail in some allocation below, we cannot bail out immediately;
2841 that would put us in a difficult situation to clean up; imagine we
2842 have planned this layout:
2843 outer1 - sj_mat_tmp1 - outer2 - sj_mat_tmp2 - outer3
2844 We have successfully filled a JOIN_TAB for sj_mat_tmp1, and are
2845 failing to fill a JOIN_TAB for sj_mat_tmp2 (OOM). So we want to quit
2846 this function, which will lead to cleanup functions.
2847 But sj_mat_tmp1 is in this->best_ref only, outer3 is in this->join_tab
2848 only: what is the array to traverse for cleaning up? What is the
2849 number of tables to loop over?
2850 So: if we fail in the present loop, we record the error but continue
2851 filling best_ref; when it's fully filled, bail out, because then
2852 best_ref can be used as reliable array for cleaning up.
2853 */
2854 JOIN_TAB *const tab= tmp_join_tabs++;
2855 best_ref[outer_target]= tab;
2856 tab->set_join(this);
2857 tab->set_idx(outer_target);
2858
2859 /*
2860 Up to this point there cannot be a failure. JOIN_TAB has been filled
2861 enough to be clean-able.
2862 */
2863
2864 Semijoin_mat_exec *const sjm_exec=
2865 new (thd->mem_root)
2866 Semijoin_mat_exec(sj_nest,
2867 (pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN),
2868 remaining_sjm_inner, outer_target, inner_target);
2869
2870 tab->set_sj_mat_exec(sjm_exec);
2871
2872 if (!sjm_exec ||
2873 setup_semijoin_materialized_table(tab, sjm_index,
2874 pos, best_positions + sjm_index))
2875 err= true; /* purecov: inspected */
2876
2877 outer_target++;
2878 sjm_index++;
2879 }
2880 /*
2881 Locate join_tab target for the table we are considering.
2882 (remaining_sjm_inner becomes negative for non-SJM tables, this can be
2883 safely ignored).
2884 */
2885 const uint target=
2886 (remaining_sjm_inner--) > 0 ? inner_target++ : outer_target++;
2887 JOIN_TAB *const tab= pos->table;
2888
2889 best_ref[target]= tab;
2890 tab->set_idx(target);
2891 tab->set_position(pos);
2892 TABLE *const table= tab->table();
2893 if (tab->type() != JT_CONST && tab->type() != JT_SYSTEM)
2894 {
2895 if (pos->sj_strategy == SJ_OPT_LOOSE_SCAN && tab->quick() &&
2896 tab->quick()->index != pos->loosescan_key)
2897 {
2898 /*
2899 We must use the duplicate-eliminating index, so this QUICK is not
2900 an option.
2901 */
2902 delete tab->quick();
2903 tab->set_quick(NULL);
2904 }
2905 if (!pos->key)
2906 {
2907 if (tab->quick())
2908 tab->set_type(calc_join_type(tab->quick()->get_type()));
2909 else
2910 tab->set_type(JT_ALL);
2911 }
2912 else
2913 // REF or RANGE, clarify later when prefix tables are set for JOIN_TABs
2914 tab->set_type(JT_REF);
2915 }
2916 assert(tab->type() != JT_UNKNOWN);
2917
2918 assert(table->reginfo.join_tab == tab);
2919 if (!tab->join_cond())
2920 table->reginfo.not_exists_optimize= false; // Only with LEFT JOIN
2921 map2table[tab->table_ref->tableno()]= tab;
2922 }
2923
2924 // Count the materialized semi-join tables as regular input tables
2925 tables+= sjm_nests + num_tmp_tables;
2926 // Set the number of non-materialized tables:
2927 primary_tables= outer_target;
2928
2929 /*
2930 Between the last outer table or sj-mat tmp table, and the first sj-mat
2931 inner table, there may be 2 slots for sort/group/etc tmp tables:
2932 */
2933 for (uint i= 0; i < num_tmp_tables; ++i)
2934 {
2935 const uint idx= outer_target + i;
2936 tmp_join_tabs->set_join(this);
2937 tmp_join_tabs->set_idx(idx);
2938 assert(best_ref[idx] == NULL); // verify that not overwriting
2939 best_ref[idx]= tmp_join_tabs++;
2940 /*
2941 note that set_table() cannot be called yet. We may not even use this
2942 JOIN_TAB in the end, it's dummy at the moment. Which can be tested with
2943 "position()!=NULL".
2944 */
2945 }
2946
2947 // make array unreachable: should walk JOIN_TABs by best_ref now
2948 join_tab= NULL;
2949
2950 if (err)
2951 DBUG_RETURN(true); /* purecov: inspected */
2952
2953 if (has_semijoin)
2954 {
2955 set_semijoin_info();
2956
2957 // Update equalities and keyuses after having added SJ materialization
2958 if (update_equalities_for_sjm())
2959 DBUG_RETURN(true);
2960 }
2961 if (!plan_is_const())
2962 {
2963 // Assign map of "available" tables to all tables belonging to query block
2964 set_prefix_tables();
2965 adjust_access_methods();
2966 }
2967 // Calculate outer join info
2968 if (select_lex->outer_join)
2969 make_outerjoin_info();
2970
2971 // sjm is no longer needed, trash it. To reuse it, reset its members!
2972 List_iterator<TABLE_LIST> sj_list_it(select_lex->sj_nests);
2973 TABLE_LIST *sj_nest;
2974 while ((sj_nest= sj_list_it++))
2975 TRASH(&sj_nest->nested_join->sjm, sizeof(sj_nest->nested_join->sjm));
2976
2977 DBUG_RETURN(false);
2978 }
2979
2980
2981 /*
2982 Revise usage of join buffer for the specified table and the whole nest
2983
2984 SYNOPSIS
2985 revise_cache_usage()
2986 tab join table for which join buffer usage is to be revised
2987
2988 DESCRIPTION
2989 The function revise the decision to use a join buffer for the table 'tab'.
2990 If this table happened to be among the inner tables of a nested outer join/
2991 semi-join the functions denies usage of join buffers for all of them
2992
2993 RETURN
2994 none
2995 */
2996
2997 static
revise_cache_usage(JOIN_TAB * join_tab)2998 void revise_cache_usage(JOIN_TAB *join_tab)
2999 {
3000 plan_idx first_inner= join_tab->first_inner();
3001 JOIN *const join= join_tab->join();
3002 if (first_inner != NO_PLAN_IDX)
3003 {
3004 plan_idx end_tab= join_tab->idx();
3005 for (first_inner= join_tab->first_inner();
3006 first_inner != NO_PLAN_IDX;
3007 first_inner= join->best_ref[first_inner]->first_upper())
3008 {
3009 for (plan_idx i= end_tab-1; i >= first_inner; --i)
3010 join->best_ref[i]->set_use_join_cache(JOIN_CACHE::ALG_NONE);
3011 end_tab= first_inner;
3012 }
3013 }
3014 else if (join_tab->get_sj_strategy() == SJ_OPT_FIRST_MATCH)
3015 {
3016 plan_idx first_sj_inner= join_tab->first_sj_inner();
3017 for (plan_idx i= join_tab->idx()-1; i >= first_sj_inner; --i)
3018 {
3019 JOIN_TAB *tab= join->best_ref[i];
3020 if (tab->first_sj_inner() == first_sj_inner)
3021 tab->set_use_join_cache(JOIN_CACHE::ALG_NONE);
3022 }
3023 }
3024 else
3025 join_tab->set_use_join_cache(JOIN_CACHE::ALG_NONE);
3026 assert(join->qep_tab == NULL);
3027 }
3028
3029
3030 /**
3031 Set up join buffering for a specified table, if possible.
3032
3033 @param tab joined table to check join buffer usage for
3034 @param join join for which the check is performed
3035 @param no_jbuf_after don't use join buffering after table with this number
3036
3037 @return false if successful, true if error.
3038 Currently, allocation errors for join cache objects are ignored,
3039 and regular execution is chosen silently.
3040
3041 @details
3042 The function finds out whether the table 'tab' can be joined using a join
3043 buffer. This check is performed after the best execution plan for 'join'
3044 has been chosen. If the function decides that a join buffer can be employed
3045 then it selects the most appropriate join cache type, which later will
3046 be instantiated by init_join_cache().
3047 If it has already been decided to not use join buffering for this table,
3048 no action is taken.
3049
3050 Often it is already decided that join buffering will be used earlier in
3051 the optimization process, and this will also ensure that the most correct
3052 cost for the operation is calculated, and hence the probability of
3053 choosing an optimal join plan is higher. However, some join buffering
3054 decisions cannot currently be taken before this stage, hence we need this
3055 function to decide the most accurate join buffering strategy.
3056
3057 @todo Long-term it is the goal that join buffering strategy is decided
3058 when the plan is selected.
3059
3060 The result of the check and the type of the join buffer to be used
3061 depend on:
3062 - the access method to access rows of the joined table
3063 - whether the join table is an inner table of an outer join or semi-join
3064 - the optimizer_switch settings for join buffering
3065 - the join 'options'.
3066 In any case join buffer is not used if the number of the joined table is
3067 greater than 'no_jbuf_after'.
3068
3069 If block_nested_loop is turned on, and if all other criteria for using
3070 join buffering is fulfilled (see below), then join buffer is used
3071 for any join operation (inner join, outer join, semi-join) with 'JT_ALL'
3072 access method. In that case, a JOIN_CACHE_BNL type is always employed.
3073
3074 If an index is used to access rows of the joined table and batched_key_access
3075 is on, then a JOIN_CACHE_BKA type is employed. (Unless debug flag,
3076 test_bka unique, is set, then a JOIN_CACHE_BKA_UNIQUE type is employed
3077 instead.)
3078
3079 If the function decides that a join buffer can be used to join the table
3080 'tab' then it sets @c tab->use_join_cache to reflect the chosen algorithm.
3081
3082 @note
3083 For a nested outer join/semi-join, currently, we either use join buffers for
3084 all inner tables or for none of them.
3085
3086 @todo
3087 Support BKA inside SJ-Materialization nests. When doing this, we'll need
3088 to only store sj-inner tables in the join buffer.
3089 #if 0
3090 JOIN_TAB *first_tab= join->join_tab+join->const_tables;
3091 uint n_tables= i-join->const_tables;
3092 / *
3093 We normally put all preceding tables into the join buffer, except
3094 for the constant tables.
3095 If we're inside a semi-join materialization nest, e.g.
3096
3097 outer_tbl1 outer_tbl2 ( inner_tbl1, inner_tbl2 ) ...
3098 ^-- we're here
3099
3100 then we need to put into the join buffer only the tables from
3101 within the nest.
3102 * /
3103 if (i >= first_sjm_table && i < last_sjm_table)
3104 {
3105 n_tables= i - first_sjm_table; // will be >0 if we got here
3106 first_tab= join->join_tab + first_sjm_table;
3107 }
3108 #endif
3109
3110 */
3111
setup_join_buffering(JOIN_TAB * tab,JOIN * join,uint no_jbuf_after)3112 static bool setup_join_buffering(JOIN_TAB *tab, JOIN *join, uint no_jbuf_after)
3113 {
3114 ASSERT_BEST_REF_IN_JOIN_ORDER(join);
3115 Cost_estimate cost;
3116 ha_rows rows;
3117 uint bufsz= 4096;
3118 uint join_cache_flags = 0;
3119 const bool bnl_on= hint_table_state(join->thd, tab->table_ref->table,
3120 BNL_HINT_ENUM, OPTIMIZER_SWITCH_BNL);
3121 const bool bka_on= hint_table_state(join->thd, tab->table_ref->table,
3122 BKA_HINT_ENUM, OPTIMIZER_SWITCH_BKA);
3123
3124 const uint tableno= tab->idx();
3125 const uint tab_sj_strategy= tab->get_sj_strategy();
3126 bool use_bka_unique= false;
3127 DBUG_EXECUTE_IF("test_bka_unique", use_bka_unique= true;);
3128
3129 /*
3130 If all key_parts are null_rejecting, the MultiRangeRowIterator will
3131 eliminate all NULL values in the key set, such that
3132 HA_MRR_NO_NULL_ENDPOINTS can be promised.
3133 */
3134 const key_part_map keypart_map = make_prev_keypart_map(tab->ref().key_parts);
3135 if (tab->ref().null_rejecting == keypart_map) {
3136 join_cache_flags |= HA_MRR_NO_NULL_ENDPOINTS;
3137 }
3138
3139 // Set preliminary join cache setting based on decision from greedy search
3140 tab->set_use_join_cache(tab->position()->use_join_buffer ?
3141 JOIN_CACHE::ALG_BNL : JOIN_CACHE::ALG_NONE);
3142
3143 if (tableno == join->const_tables)
3144 {
3145 assert(tab->use_join_cache() == JOIN_CACHE::ALG_NONE);
3146 return false;
3147 }
3148
3149 if (!(bnl_on || bka_on))
3150 goto no_join_cache;
3151
3152 /*
3153 psergey-todo: why the below when execution code seems to handle the
3154 "range checked for each record" case?
3155 */
3156 if (tab->use_quick == QS_DYNAMIC_RANGE)
3157 goto no_join_cache;
3158
3159 /* No join buffering if prevented by no_jbuf_after */
3160 if (tableno > no_jbuf_after)
3161 goto no_join_cache;
3162
3163 /*
3164 An inner table of an outer join nest must not use join buffering if
3165 the first inner table of that outer join nest does not use join buffering.
3166 This condition is not handled by earlier optimizer stages.
3167 */
3168 if (tab->first_inner() != NO_PLAN_IDX &&
3169 tab->first_inner() != tab->idx() &&
3170 !join->best_ref[tab->first_inner()]->use_join_cache())
3171 goto no_join_cache;
3172 /*
3173 The first inner table of an outer join nest must not use join buffering
3174 if the tables in the embedding outer join nest do not use join buffering.
3175 This condition is not handled by earlier optimizer stages.
3176 */
3177 if (tab->first_upper() != NO_PLAN_IDX &&
3178 !join->best_ref[tab->first_upper()]->use_join_cache())
3179 goto no_join_cache;
3180
3181 switch (tab_sj_strategy)
3182 {
3183 case SJ_OPT_FIRST_MATCH:
3184 /*
3185 Use join cache with FirstMatch semi-join strategy only when semi-join
3186 contains only one table.
3187 */
3188 if (!tab->is_single_inner_of_semi_join())
3189 {
3190 assert(tab->use_join_cache() == JOIN_CACHE::ALG_NONE);
3191 goto no_join_cache;
3192 }
3193 break;
3194
3195 case SJ_OPT_LOOSE_SCAN:
3196 /* No join buffering if this semijoin nest is handled by loosescan */
3197 assert(tab->use_join_cache() == JOIN_CACHE::ALG_NONE);
3198 goto no_join_cache;
3199
3200 case SJ_OPT_MATERIALIZE_LOOKUP:
3201 case SJ_OPT_MATERIALIZE_SCAN:
3202 /*
3203 The Materialize strategies reuse the join_tab belonging to the
3204 first table that was materialized. Neither table can use join buffering:
3205 - The first table in a join never uses join buffering.
3206 - The join_tab used for looking up a row in the materialized table, or
3207 scanning the rows of a materialized table, cannot use join buffering.
3208 We allow join buffering for the remaining tables of the materialized
3209 semi-join nest.
3210 */
3211 if (tab->first_sj_inner() == tab->idx())
3212 {
3213 assert(tab->use_join_cache() == JOIN_CACHE::ALG_NONE);
3214 goto no_join_cache;
3215 }
3216 break;
3217
3218 case SJ_OPT_DUPS_WEEDOUT:
3219 // This strategy allows the same join buffering as a regular join would.
3220 case SJ_OPT_NONE:
3221 break;
3222 }
3223
3224 /*
3225 The following code prevents use of join buffering when there is an
3226 outer join operation and first match semi-join strategy is used, because:
3227
3228 Outer join needs a "match flag" to track that a row should be
3229 NULL-complemented, such flag being attached to first inner table's cache
3230 (tracks whether the cached row from outer table got a match, in which case
3231 no NULL-complemented row is needed).
3232
3233 FirstMatch also needs a "match flag", such flag is attached to sj inner
3234 table's cache (tracks whether the cached row from outer table already got
3235 a first match in the sj-inner table, in which case we don't need to join
3236 this cached row again)
3237 - but a row in a cache has only one "match flag"
3238 - so if "sj inner table"=="first inner", there is a problem.
3239 */
3240 if (tab_sj_strategy == SJ_OPT_FIRST_MATCH &&
3241 tab->is_inner_table_of_outer_join())
3242 goto no_join_cache;
3243
3244 switch (tab->type()) {
3245 case JT_ALL:
3246 case JT_INDEX_SCAN:
3247 case JT_RANGE:
3248 case JT_INDEX_MERGE:
3249 if (!bnl_on)
3250 {
3251 assert(tab->use_join_cache() == JOIN_CACHE::ALG_NONE);
3252 goto no_join_cache;
3253 }
3254
3255 tab->set_use_join_cache(JOIN_CACHE::ALG_BNL);
3256 return false;
3257 case JT_SYSTEM:
3258 case JT_CONST:
3259 case JT_REF:
3260 case JT_EQ_REF:
3261 if (!bka_on)
3262 {
3263 assert(tab->use_join_cache() == JOIN_CACHE::ALG_NONE);
3264 goto no_join_cache;
3265 }
3266
3267 /*
3268 Disable BKA for materializable derived tables/views as they aren't
3269 instantiated yet.
3270 */
3271 if (tab->table_ref->uses_materialization())
3272 goto no_join_cache;
3273
3274 /*
3275 Can't use BKA for subquery if dealing with a subquery that can
3276 turn a ref access into a "full scan on NULL key" table scan.
3277
3278 @see Item_in_optimizer::val_int()
3279 @see subselect_single_select_engine::exec()
3280 @see TABLE_REF::cond_guards
3281 @see push_index_cond()
3282
3283 @todo: This choice to not use BKA should be done before making
3284 cost estimates, e.g. in set_join_buffer_properties(). That
3285 happens before cond guards are set up, so instead of doing the
3286 check below, BKA should be disabled if
3287 - We are in an IN subquery, and
3288 - The IN predicate is not a top_level_item, and
3289 - The left_expr of the IN predicate may contain NULL values
3290 (left_expr->maybe_null)
3291 */
3292 if (tab->has_guarded_conds())
3293 goto no_join_cache;
3294
3295 if (tab->table()->covering_keys.is_set(tab->ref().key))
3296 join_cache_flags|= HA_MRR_INDEX_ONLY;
3297 rows= tab->table()->file->multi_range_read_info(tab->ref().key, 10, 20,
3298 &bufsz,
3299 &join_cache_flags, &cost);
3300 /*
3301 Cannot use BKA/BKA_UNIQUE if
3302 1. MRR scan cannot be performed, or
3303 2. MRR default implementation is used
3304 Cannot use BKA if
3305 3. HA_MRR_NO_ASSOCIATION flag is set
3306 */
3307 if ((rows == HA_POS_ERROR) || // 1
3308 (join_cache_flags & HA_MRR_USE_DEFAULT_IMPL) || // 2
3309 ((join_cache_flags & HA_MRR_NO_ASSOCIATION) && // 3
3310 !use_bka_unique))
3311 goto no_join_cache;
3312
3313 if (use_bka_unique)
3314 tab->set_use_join_cache(JOIN_CACHE::ALG_BKA_UNIQUE);
3315 else
3316 tab->set_use_join_cache(JOIN_CACHE::ALG_BKA);
3317
3318 tab->join_cache_flags= join_cache_flags;
3319 return false;
3320 default : ;
3321 }
3322
3323 no_join_cache:
3324 revise_cache_usage(tab);
3325 tab->set_use_join_cache(JOIN_CACHE::ALG_NONE);
3326 return false;
3327 }
3328
3329
3330 /*****************************************************************************
3331 Make some simple condition optimization:
3332 If there is a test 'field = const' change all refs to 'field' to 'const'
3333 Remove all dummy tests 'item = item', 'const op const'.
3334 Remove all 'item is NULL', when item can never be null!
3335 item->marker should be 0 for all items on entry
3336 Return in cond_value FALSE if condition is impossible (1 = 2)
3337 *****************************************************************************/
3338
3339 class COND_CMP :public ilink<COND_CMP> {
3340 public:
operator new(size_t size)3341 static void *operator new(size_t size)
3342 {
3343 return sql_alloc(size);
3344 }
operator delete(void * ptr MY_ATTRIBUTE ((unused)),size_t size MY_ATTRIBUTE ((unused)))3345 static void operator delete(void *ptr MY_ATTRIBUTE((unused)),
3346 size_t size MY_ATTRIBUTE((unused)))
3347 { TRASH(ptr, size); }
3348
3349 Item *and_level;
3350 Item_func *cmp_func;
COND_CMP(Item * a,Item_func * b)3351 COND_CMP(Item *a,Item_func *b) :and_level(a),cmp_func(b) {}
3352 };
3353
3354
3355 /**
3356 Find the multiple equality predicate containing a field.
3357
3358 The function retrieves the multiple equalities accessed through
3359 the cond_equal structure from current level and up looking for
3360 an equality containing a field. It stops retrieval as soon as the equality
3361 is found and set up inherited_fl to TRUE if it's found on upper levels.
3362
3363 @param cond_equal multiple equalities to search in
3364 @param item_field field to look for
3365 @param[out] inherited_fl set up to TRUE if multiple equality is found
3366 on upper levels (not on current level of
3367 cond_equal)
3368
3369 @return
3370 - Item_equal for the found multiple equality predicate if a success;
3371 - NULL otherwise.
3372 */
3373
find_item_equal(COND_EQUAL * cond_equal,Item_field * item_field,bool * inherited_fl)3374 Item_equal *find_item_equal(COND_EQUAL *cond_equal, Item_field *item_field,
3375 bool *inherited_fl)
3376 {
3377 Item_equal *item= 0;
3378 bool in_upper_level= FALSE;
3379 while (cond_equal)
3380 {
3381 List_iterator_fast<Item_equal> li(cond_equal->current_level);
3382 while ((item= li++))
3383 {
3384 if (item->contains(item_field->field))
3385 goto finish;
3386 }
3387 in_upper_level= TRUE;
3388 cond_equal= cond_equal->upper_levels;
3389 }
3390 in_upper_level= FALSE;
3391 finish:
3392 *inherited_fl= in_upper_level;
3393 return item;
3394 }
3395
3396
3397 /**
3398 Get the best field substitution for a given field.
3399
3400 If the field is member of a multiple equality, look up that equality
3401 and return the most appropriate field. Usually this is the equivalenced
3402 field belonging to the outer-most table in the join order, but
3403 @see Item_field::get_subst_item() for details.
3404 Otherwise, return the same field.
3405
3406 @param item_field The field that we are seeking a substitution for.
3407 @param cond_equal multiple equalities to search in
3408
3409 @return The substituted field.
3410 */
3411
get_best_field(Item_field * item_field,COND_EQUAL * cond_equal)3412 Item_field *get_best_field(Item_field *item_field, COND_EQUAL *cond_equal)
3413 {
3414 bool dummy;
3415 Item_equal *item_eq= find_item_equal(cond_equal, item_field, &dummy);
3416 if (!item_eq)
3417 return item_field;
3418
3419 return item_eq->get_subst_item(item_field);
3420 }
3421
3422
3423 /**
3424 Check whether an equality can be used to build multiple equalities.
3425
3426 This function first checks whether the equality (left_item=right_item)
3427 is a simple equality i.e. one that equates a field with another field
3428 or a constant (field=field_item or field=const_item).
3429 If this is the case the function looks for a multiple equality
3430 in the lists referenced directly or indirectly by cond_equal inferring
3431 the given simple equality. If it doesn't find any, it builds a multiple
3432 equality that covers the predicate, i.e. the predicate can be inferred
3433 from this multiple equality.
3434 The built multiple equality could be obtained in such a way:
3435 create a binary multiple equality equivalent to the predicate, then
3436 merge it, if possible, with one of old multiple equalities.
3437 This guarantees that the set of multiple equalities covering equality
3438 predicates will be minimal.
3439
3440 EXAMPLE:
3441 For the where condition
3442 @code
3443 WHERE a=b AND b=c AND
3444 (b=2 OR f=e)
3445 @endcode
3446 the check_equality will be called for the following equality
3447 predicates a=b, b=c, b=2 and f=e.
3448 - For a=b it will be called with *cond_equal=(0,[]) and will transform
3449 *cond_equal into (0,[Item_equal(a,b)]).
3450 - For b=c it will be called with *cond_equal=(0,[Item_equal(a,b)])
3451 and will transform *cond_equal into CE=(0,[Item_equal(a,b,c)]).
3452 - For b=2 it will be called with *cond_equal=(ptr(CE),[])
3453 and will transform *cond_equal into (ptr(CE),[Item_equal(2,a,b,c)]).
3454 - For f=e it will be called with *cond_equal=(ptr(CE), [])
3455 and will transform *cond_equal into (ptr(CE),[Item_equal(f,e)]).
3456
3457 @note
3458 Now only fields that have the same type definitions (verified by
3459 the Field::eq_def method) are placed to the same multiple equalities.
3460 Because of this some equality predicates are not eliminated and
3461 can be used in the constant propagation procedure.
3462 We could weaken the equality test as soon as at least one of the
3463 equal fields is to be equal to a constant. It would require a
3464 more complicated implementation: we would have to store, in
3465 general case, its own constant for each fields from the multiple
3466 equality. But at the same time it would allow us to get rid
3467 of constant propagation completely: it would be done by the call
3468 to build_equal_items_for_cond.
3469
3470 The implementation does not follow exactly the above rules to
3471 build a new multiple equality for the equality predicate.
3472 If it processes the equality of the form field1=field2, it
3473 looks for multiple equalities me1 containing field1 and me2 containing
3474 field2. If only one of them is found the function expands it with
3475 the lacking field. If multiple equalities for both fields are
3476 found they are merged. If both searches fail a new multiple equality
3477 containing just field1 and field2 is added to the existing
3478 multiple equalities.
3479 If the function processes the predicate of the form field1=const,
3480 it looks for a multiple equality containing field1. If found, the
3481 function checks the constant of the multiple equality. If the value
3482 is unknown, it is setup to const. Otherwise the value is compared with
3483 const and the evaluation of the equality predicate is performed.
3484 When expanding/merging equality predicates from the upper levels
3485 the function first copies them for the current level. It looks
3486 acceptable, as this happens rarely. The implementation without
3487 copying would be much more complicated.
3488
3489 @param thd Thread handler
3490 @param left_item left term of the equality to be checked
3491 @param right_item right term of the equality to be checked
3492 @param item equality item if the equality originates from a condition
3493 predicate, 0 if the equality is the result of row
3494 elimination
3495 @param cond_equal multiple equalities that must hold together with the
3496 equality
3497 @param[out] simple_equality
3498 true if the predicate is a simple equality predicate
3499 to be used for building multiple equalities
3500 false otherwise
3501
3502 @returns false if success, true if error
3503 */
3504
check_simple_equality(THD * thd,Item * left_item,Item * right_item,Item * item,COND_EQUAL * cond_equal,bool * simple_equality)3505 static bool check_simple_equality(THD *thd,
3506 Item *left_item, Item *right_item,
3507 Item *item, COND_EQUAL *cond_equal,
3508 bool *simple_equality)
3509 {
3510 *simple_equality= false;
3511
3512 if (left_item->type() == Item::REF_ITEM &&
3513 down_cast<Item_ref *>(left_item)->ref_type() == Item_ref::VIEW_REF)
3514 {
3515 if (down_cast<Item_ref *>(left_item)->depended_from)
3516 return false;
3517 left_item= left_item->real_item();
3518 }
3519 if (right_item->type() == Item::REF_ITEM &&
3520 down_cast<Item_ref *>(right_item)->ref_type() == Item_ref::VIEW_REF)
3521 {
3522 if (down_cast<Item_ref *>(right_item)->depended_from)
3523 return false;
3524 right_item= right_item->real_item();
3525 }
3526 Item_field *left_item_field, *right_item_field;
3527
3528 if (left_item->type() == Item::FIELD_ITEM &&
3529 right_item->type() == Item::FIELD_ITEM &&
3530 (left_item_field= down_cast<Item_field *>(left_item)) &&
3531 (right_item_field= down_cast<Item_field *>(right_item)) &&
3532 !left_item_field->depended_from &&
3533 !right_item_field->depended_from)
3534 {
3535 /* The predicate the form field1=field2 is processed */
3536
3537 Field *const left_field= left_item_field->field;
3538 Field *const right_field= right_item_field->field;
3539
3540 if (!left_field->eq_def(right_field))
3541 return false;
3542
3543 /* Search for multiple equalities containing field1 and/or field2 */
3544 bool left_copyfl, right_copyfl;
3545 Item_equal *left_item_equal=
3546 find_item_equal(cond_equal, left_item_field, &left_copyfl);
3547 Item_equal *right_item_equal=
3548 find_item_equal(cond_equal, right_item_field, &right_copyfl);
3549
3550 /* As (NULL=NULL) != TRUE we can't just remove the predicate f=f */
3551 if (left_field->eq(right_field)) /* f = f */
3552 {
3553 *simple_equality= !(left_field->maybe_null() && !left_item_equal);
3554 return false;
3555 }
3556
3557 if (left_item_equal && left_item_equal == right_item_equal)
3558 {
3559 /*
3560 The equality predicate is inference of one of the existing
3561 multiple equalities, i.e the condition is already covered
3562 by upper level equalities
3563 */
3564 *simple_equality= true;
3565 return false;
3566 }
3567
3568 /* Copy the found multiple equalities at the current level if needed */
3569 if (left_copyfl)
3570 {
3571 /* left_item_equal of an upper level contains left_item */
3572 left_item_equal= new Item_equal(left_item_equal);
3573 if (left_item_equal == NULL)
3574 return true;
3575 cond_equal->current_level.push_back(left_item_equal);
3576 }
3577 if (right_copyfl)
3578 {
3579 /* right_item_equal of an upper level contains right_item */
3580 right_item_equal= new Item_equal(right_item_equal);
3581 if (right_item_equal == NULL)
3582 return true;
3583 cond_equal->current_level.push_back(right_item_equal);
3584 }
3585
3586 if (left_item_equal)
3587 {
3588 /* left item was found in the current or one of the upper levels */
3589 if (! right_item_equal)
3590 left_item_equal->add(down_cast<Item_field *>(right_item));
3591 else
3592 {
3593 /* Merge two multiple equalities forming a new one */
3594 if (left_item_equal->merge(thd, right_item_equal))
3595 return true;
3596 /* Remove the merged multiple equality from the list */
3597 List_iterator<Item_equal> li(cond_equal->current_level);
3598 while ((li++) != right_item_equal) ;
3599 li.remove();
3600 }
3601 }
3602 else
3603 {
3604 /* left item was not found neither the current nor in upper levels */
3605 if (right_item_equal)
3606 {
3607 right_item_equal->add(down_cast<Item_field *>(left_item));
3608 }
3609 else
3610 {
3611 /* None of the fields was found in multiple equalities */
3612 Item_equal *item_equal=
3613 new Item_equal(down_cast<Item_field *>(left_item),
3614 down_cast<Item_field *>(right_item));
3615 if (item_equal == NULL)
3616 return true;
3617 cond_equal->current_level.push_back(item_equal);
3618 }
3619 }
3620 *simple_equality= true;
3621 return false;
3622 }
3623
3624 {
3625 /* The predicate of the form field=const/const=field is processed */
3626 Item *const_item= 0;
3627 Item_field *field_item= 0;
3628 if (left_item->type() == Item::FIELD_ITEM &&
3629 (field_item= down_cast<Item_field *>(left_item)) &&
3630 field_item->depended_from == NULL &&
3631 right_item->const_item())
3632 {
3633 const_item= right_item;
3634 }
3635 else if (right_item->type() == Item::FIELD_ITEM &&
3636 (field_item= down_cast<Item_field *>(right_item)) &&
3637 field_item->depended_from == NULL &&
3638 left_item->const_item())
3639 {
3640 const_item= left_item;
3641 }
3642
3643 if (const_item &&
3644 field_item->result_type() == const_item->result_type())
3645 {
3646 if (field_item->result_type() == STRING_RESULT)
3647 {
3648 const CHARSET_INFO *cs= field_item->field->charset();
3649 if (!item)
3650 {
3651 Item_func_eq *const eq_item= new Item_func_eq(left_item, right_item);
3652 if (eq_item == NULL || eq_item->set_cmp_func())
3653 return true;
3654 eq_item->quick_fix_field();
3655 item= eq_item;
3656 }
3657 if ((cs != down_cast<Item_func *>(item)->compare_collation()) ||
3658 !cs->coll->propagate(cs, 0, 0))
3659 return false;
3660 }
3661
3662 bool copyfl;
3663 Item_equal *item_equal= find_item_equal(cond_equal, field_item, ©fl);
3664 if (copyfl)
3665 {
3666 item_equal= new Item_equal(item_equal);
3667 if (item_equal == NULL)
3668 return true;
3669 cond_equal->current_level.push_back(item_equal);
3670 }
3671 if (item_equal)
3672 {
3673 /*
3674 The flag cond_false will be set to 1 after this, if item_equal
3675 already contains a constant and its value is not equal to
3676 the value of const_item.
3677 */
3678 if (item_equal->add(thd, const_item, field_item))
3679 return true;
3680 }
3681 else
3682 {
3683 item_equal= new Item_equal(const_item, field_item);
3684 if (item_equal == NULL)
3685 return true;
3686 cond_equal->current_level.push_back(item_equal);
3687 }
3688 *simple_equality= true;
3689 return false;
3690 }
3691 }
3692 return false;
3693 }
3694
3695
3696 /**
3697 Convert row equalities into a conjunction of regular equalities.
3698
3699 The function converts a row equality of the form (E1,...,En)=(E'1,...,E'n)
3700 into a list of equalities E1=E'1,...,En=E'n. For each of these equalities
3701 Ei=E'i the function checks whether it is a simple equality or a row
3702 equality. If it is a simple equality it is used to expand multiple
3703 equalities of cond_equal. If it is a row equality it converted to a
3704 sequence of equalities between row elements. If Ei=E'i is neither a
3705 simple equality nor a row equality the item for this predicate is added
3706 to eq_list.
3707
3708 @param thd thread handle
3709 @param left_row left term of the row equality to be processed
3710 @param right_row right term of the row equality to be processed
3711 @param cond_equal multiple equalities that must hold together with the
3712 predicate
3713 @param eq_list results of conversions of row equalities that are not
3714 simple enough to form multiple equalities
3715 @param[out] simple_equality
3716 true if the row equality is composed of only
3717 simple equalities.
3718
3719 @returns false if conversion succeeded, true if any error.
3720 */
3721
check_row_equality(THD * thd,Item * left_row,Item_row * right_row,COND_EQUAL * cond_equal,List<Item> * eq_list,bool * simple_equality)3722 static bool check_row_equality(THD *thd, Item *left_row, Item_row *right_row,
3723 COND_EQUAL *cond_equal, List<Item>* eq_list,
3724 bool *simple_equality)
3725 {
3726 *simple_equality= false;
3727 uint n= left_row->cols();
3728 for (uint i= 0 ; i < n; i++)
3729 {
3730 bool is_converted;
3731 Item *left_item= left_row->element_index(i);
3732 Item *right_item= right_row->element_index(i);
3733 if (left_item->type() == Item::ROW_ITEM &&
3734 right_item->type() == Item::ROW_ITEM)
3735 {
3736 if (check_row_equality(thd,
3737 down_cast<Item_row *>(left_item),
3738 down_cast<Item_row *>(right_item),
3739 cond_equal, eq_list, &is_converted))
3740 return true;
3741 if (!is_converted)
3742 thd->lex->current_select()->cond_count++;
3743 }
3744 else
3745 {
3746 if (check_simple_equality(thd, left_item, right_item, 0, cond_equal,
3747 &is_converted))
3748 return true;
3749 thd->lex->current_select()->cond_count++;
3750 }
3751
3752 if (!is_converted)
3753 {
3754 Item_func_eq *const eq_item= new Item_func_eq(left_item, right_item);
3755 if (eq_item == NULL)
3756 return true;
3757 if (eq_item->set_cmp_func())
3758 {
3759 // Failed to create cmp func -> not only simple equalitities
3760 return true;
3761 }
3762 eq_item->quick_fix_field();
3763 eq_list->push_back(eq_item);
3764 }
3765 }
3766 *simple_equality= true;
3767 return false;
3768 }
3769
3770
3771 /**
3772 Eliminate row equalities and form multiple equalities predicates.
3773
3774 This function checks whether the item is a simple equality
3775 i.e. the one that equates a field with another field or a constant
3776 (field=field_item or field=constant_item), or, a row equality.
3777 For a simple equality the function looks for a multiple equality
3778 in the lists referenced directly or indirectly by cond_equal inferring
3779 the given simple equality. If it doesn't find any, it builds/expands
3780 multiple equality that covers the predicate.
3781 Row equalities are eliminated substituted for conjunctive regular
3782 equalities which are treated in the same way as original equality
3783 predicates.
3784
3785 @param thd thread handle
3786 @param item predicate to process
3787 @param cond_equal multiple equalities that must hold together with the
3788 predicate
3789 @param eq_list results of conversions of row equalities that are not
3790 simple enough to form multiple equalities
3791 @param[out] equality
3792 true if re-writing rules have been applied
3793 false otherwise, i.e.
3794 if the predicate is not an equality, or
3795 if the equality is neither a simple nor a row equality
3796
3797 @returns false if success, true if error
3798
3799 @note If the equality was created by IN->EXISTS, it may be removed later by
3800 subquery materialization. So we don't mix this possibly temporary equality
3801 with others; if we let it go into a multiple-equality (Item_equal), then we
3802 could not remove it later. There is however an exception: if the outer
3803 expression is a constant, it is safe to leave the equality even in
3804 materialization; all it can do is preventing NULL/FALSE distinction but if
3805 such distinction mattered the equality would be in a triggered condition so
3806 we would not come to this function. And injecting constants is good because
3807 it makes the materialized table smaller.
3808 */
3809
check_equality(THD * thd,Item * item,COND_EQUAL * cond_equal,List<Item> * eq_list,bool * equality)3810 static bool check_equality(THD *thd, Item *item, COND_EQUAL *cond_equal,
3811 List<Item> *eq_list, bool *equality)
3812 {
3813 *equality= false;
3814 Item_func *item_func;
3815 if (item->type() == Item::FUNC_ITEM &&
3816 (item_func= down_cast<Item_func *>(item))->functype() ==
3817 Item_func::EQ_FUNC)
3818 {
3819 Item *left_item= item_func->arguments()[0];
3820 Item *right_item= item_func->arguments()[1];
3821
3822 if (item->created_by_in2exists() && !left_item->const_item())
3823 return false; // See note above
3824
3825 if (left_item->type() == Item::ROW_ITEM &&
3826 right_item->type() == Item::ROW_ITEM)
3827 {
3828 thd->lex->current_select()->cond_count--;
3829 return check_row_equality(thd,
3830 down_cast<Item_row *>(left_item),
3831 down_cast<Item_row *>(right_item),
3832 cond_equal, eq_list, equality);
3833 }
3834 else
3835 return check_simple_equality(thd, left_item, right_item, item, cond_equal,
3836 equality);
3837 }
3838
3839 return false;
3840 }
3841
3842
3843 /**
3844 Replace all equality predicates in a condition by multiple equality items.
3845
3846 At each 'and' level the function detects items for equality predicates
3847 and replaces them by a set of multiple equality items of class Item_equal,
3848 taking into account inherited equalities from upper levels.
3849 If an equality predicate is used not in a conjunction it's just
3850 replaced by a multiple equality predicate.
3851 For each 'and' level the function set a pointer to the inherited
3852 multiple equalities in the cond_equal field of the associated
3853 object of the type Item_cond_and.
3854 The function also traverses the cond tree and for each field reference
3855 sets a pointer to the multiple equality item containing the field, if there
3856 is any. If this multiple equality equates fields to a constant the
3857 function replaces the field reference by the constant in the cases
3858 when the field is not of a string type or when the field reference is
3859 just an argument of a comparison predicate.
3860 The function also determines the maximum number of members in
3861 equality lists of each Item_cond_and object assigning it to
3862 thd->lex->current_select()->max_equal_elems.
3863
3864 @note
3865 Multiple equality predicate =(f1,..fn) is equivalent to the conjuction of
3866 f1=f2, .., fn-1=fn. It substitutes any inference from these
3867 equality predicates that is equivalent to the conjunction.
3868 Thus, =(a1,a2,a3) can substitute for ((a1=a3) AND (a2=a3) AND (a2=a1)) as
3869 it is equivalent to ((a1=a2) AND (a2=a3)).
3870 The function always makes a substitution of all equality predicates occured
3871 in a conjunction for a minimal set of multiple equality predicates.
3872 This set can be considered as a canonical representation of the
3873 sub-conjunction of the equality predicates.
3874 E.g. (t1.a=t2.b AND t2.b>5 AND t1.a=t3.c) is replaced by
3875 (=(t1.a,t2.b,t3.c) AND t2.b>5), not by
3876 (=(t1.a,t2.b) AND =(t1.a,t3.c) AND t2.b>5);
3877 while (t1.a=t2.b AND t2.b>5 AND t3.c=t4.d) is replaced by
3878 (=(t1.a,t2.b) AND =(t3.c=t4.d) AND t2.b>5),
3879 but if additionally =(t4.d,t2.b) is inherited, it
3880 will be replaced by (=(t1.a,t2.b,t3.c,t4.d) AND t2.b>5)
3881
3882 The function performs the substitution in a recursive descent of
3883 the condition tree, passing to the next AND level a chain of multiple
3884 equality predicates which have been built at the upper levels.
3885 The Item_equal items built at the level are attached to other
3886 non-equality conjuncts as a sublist. The pointer to the inherited
3887 multiple equalities is saved in the and condition object (Item_cond_and).
3888 This chain allows us for any field reference occurence to easily find a
3889 multiple equality that must be held for this occurence.
3890 For each AND level we do the following:
3891 - scan it for all equality predicate (=) items
3892 - join them into disjoint Item_equal() groups
3893 - process the included OR conditions recursively to do the same for
3894 lower AND levels.
3895
3896 We need to do things in this order as lower AND levels need to know about
3897 all possible Item_equal objects in upper levels.
3898
3899 @param thd thread handle
3900 @param cond condition(expression) where to make replacement
3901 @param[out] retcond returned condition
3902 @param inherited path to all inherited multiple equality items
3903 @param do_inherit whether or not to inherit equalities from other parts
3904 of the condition
3905
3906 @returns false if success, true if error
3907 */
3908
build_equal_items_for_cond(THD * thd,Item * cond,Item ** retcond,COND_EQUAL * inherited,bool do_inherit)3909 static bool build_equal_items_for_cond(THD *thd, Item *cond, Item **retcond,
3910 COND_EQUAL *inherited, bool do_inherit)
3911 {
3912 Item_equal *item_equal;
3913 COND_EQUAL cond_equal;
3914 cond_equal.upper_levels= inherited;
3915
3916 if (check_stack_overrun(thd, STACK_MIN_SIZE, NULL))
3917 return true; // Fatal error flag is set!
3918
3919 const enum Item::Type cond_type= cond->type();
3920 if (cond_type == Item::COND_ITEM)
3921 {
3922 List<Item> eq_list;
3923 Item_cond *const item_cond= down_cast<Item_cond *>(cond);
3924 const bool and_level= item_cond->functype() == Item_func::COND_AND_FUNC;
3925 List<Item> *args= item_cond->argument_list();
3926
3927 List_iterator<Item> li(*args);
3928 Item *item;
3929
3930 if (and_level)
3931 {
3932 /*
3933 Retrieve all conjuncts of this level detecting the equality
3934 that are subject to substitution by multiple equality items and
3935 removing each such predicate from the conjunction after having
3936 found/created a multiple equality whose inference the predicate is.
3937 */
3938 while ((item= li++))
3939 {
3940 /*
3941 PS/SP note: we can safely remove a node from AND-OR
3942 structure here because it's restored before each
3943 re-execution of any prepared statement/stored procedure.
3944 */
3945 bool equality;
3946 if (check_equality(thd, item, &cond_equal, &eq_list, &equality))
3947 return true;
3948 if (equality)
3949 li.remove();
3950 }
3951
3952 /*
3953 Check if we eliminated all the predicates of the level, e.g.
3954 (a=a AND b=b AND a=a).
3955 */
3956 if (!args->elements &&
3957 !cond_equal.current_level.elements &&
3958 !eq_list.elements)
3959 {
3960 *retcond= new Item_int((longlong) 1, 1);
3961 return *retcond == NULL;
3962 }
3963
3964 List_iterator_fast<Item_equal> it(cond_equal.current_level);
3965 while ((item_equal= it++))
3966 {
3967 item_equal->fix_length_and_dec();
3968 item_equal->update_used_tables();
3969 set_if_bigger(thd->lex->current_select()->max_equal_elems,
3970 item_equal->members());
3971 }
3972
3973 Item_cond_and *const item_cond_and= down_cast<Item_cond_and *>(cond);
3974 item_cond_and->cond_equal= cond_equal;
3975 inherited= &item_cond_and->cond_equal;
3976 }
3977 /*
3978 Make replacement of equality predicates for lower levels
3979 of the condition expression.
3980 */
3981 li.rewind();
3982 while ((item= li++))
3983 {
3984 Item *new_item;
3985 if (build_equal_items_for_cond(thd, item, &new_item, inherited,
3986 do_inherit))
3987 return true;
3988 if (new_item != item)
3989 {
3990 /* This replacement happens only for standalone equalities */
3991 /*
3992 This is ok with PS/SP as the replacement is done for
3993 arguments of an AND/OR item, which are restored for each
3994 execution of PS/SP.
3995 */
3996 li.replace(new_item);
3997 }
3998 }
3999 if (and_level)
4000 {
4001 args->concat(&eq_list);
4002 args->concat((List<Item> *)&cond_equal.current_level);
4003 }
4004 }
4005 else if (cond->type() == Item::FUNC_ITEM)
4006 {
4007 List<Item> eq_list;
4008 /*
4009 If an equality predicate forms the whole and level,
4010 we call it standalone equality and it's processed here.
4011 E.g. in the following where condition
4012 WHERE a=5 AND (b=5 or a=c)
4013 (b=5) and (a=c) are standalone equalities.
4014 In general we can't leave alone standalone eqalities:
4015 for WHERE a=b AND c=d AND (b=c OR d=5)
4016 b=c is replaced by =(a,b,c,d).
4017 */
4018 bool equality;
4019 if (check_equality(thd, cond, &cond_equal, &eq_list, &equality))
4020 return true;
4021 if (equality)
4022 {
4023 int n= cond_equal.current_level.elements + eq_list.elements;
4024 if (n == 0)
4025 {
4026 *retcond= new Item_int((longlong) 1,1);
4027 return *retcond == NULL;
4028 }
4029 else if (n == 1)
4030 {
4031 if ((item_equal= cond_equal.current_level.pop()))
4032 {
4033 item_equal->fix_length_and_dec();
4034 item_equal->update_used_tables();
4035 set_if_bigger(thd->lex->current_select()->max_equal_elems,
4036 item_equal->members());
4037 *retcond= item_equal;
4038 return false;
4039 }
4040
4041 *retcond= eq_list.pop();
4042 return false;
4043 }
4044 else
4045 {
4046 /*
4047 Here a new AND level must be created. It can happen only
4048 when a row equality is processed as a standalone predicate.
4049 */
4050 Item_cond_and *and_cond= new Item_cond_and(eq_list);
4051 if (and_cond == NULL)
4052 return true;
4053
4054 and_cond->quick_fix_field();
4055 List<Item> *args= and_cond->argument_list();
4056 List_iterator_fast<Item_equal> it(cond_equal.current_level);
4057 while ((item_equal= it++))
4058 {
4059 item_equal->fix_length_and_dec();
4060 item_equal->update_used_tables();
4061 set_if_bigger(thd->lex->current_select()->max_equal_elems,
4062 item_equal->members());
4063 }
4064 and_cond->cond_equal= cond_equal;
4065 args->concat((List<Item> *)&cond_equal.current_level);
4066
4067 *retcond= and_cond;
4068 return false;
4069 }
4070 }
4071
4072 if (do_inherit)
4073 {
4074 /*
4075 For each field reference in cond, not from equal item predicates,
4076 set a pointer to the multiple equality it belongs to (if there is any)
4077 as soon the field is not of a string type or the field reference is
4078 an argument of a comparison predicate.
4079 */
4080 uchar *is_subst_valid= (uchar *) 1;
4081 cond= cond->compile(&Item::subst_argument_checker,
4082 &is_subst_valid,
4083 &Item::equal_fields_propagator,
4084 (uchar *) inherited);
4085 if (cond == NULL)
4086 return true;
4087 }
4088 cond->update_used_tables();
4089 }
4090 *retcond= cond;
4091 return false;
4092 }
4093
4094
4095 /**
4096 Build multiple equalities for a WHERE condition and all join conditions that
4097 inherit these multiple equalities.
4098
4099 The function first applies the build_equal_items_for_cond function
4100 to build all multiple equalities for condition cond utilizing equalities
4101 referred through the parameter inherited. The extended set of
4102 equalities is returned in the structure referred by the cond_equal_ref
4103 parameter. After this the function calls itself recursively for
4104 all join conditions whose direct references can be found in join_list
4105 and who inherit directly the multiple equalities just having built.
4106
4107 @note
4108 The join condition used in an outer join operation inherits all equalities
4109 from the join condition of the embedding join, if there is any, or
4110 otherwise - from the where condition.
4111 This fact is not obvious, but presumably can be proved.
4112 Consider the following query:
4113 @code
4114 SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t1.a=t3.a AND t2.a=t4.a
4115 WHERE t1.a=t2.a;
4116 @endcode
4117 If the join condition in the query inherits =(t1.a,t2.a), then we
4118 can build the multiple equality =(t1.a,t2.a,t3.a,t4.a) that infers
4119 the equality t3.a=t4.a. Although the join condition
4120 t1.a=t3.a AND t2.a=t4.a AND t3.a=t4.a is not equivalent to the one
4121 in the query the latter can be replaced by the former: the new query
4122 will return the same result set as the original one.
4123
4124 Interesting that multiple equality =(t1.a,t2.a,t3.a,t4.a) allows us
4125 to use t1.a=t3.a AND t3.a=t4.a under the join condition:
4126 @code
4127 SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t1.a=t3.a AND t3.a=t4.a
4128 WHERE t1.a=t2.a
4129 @endcode
4130 This query equivalent to:
4131 @code
4132 SELECT * FROM (t1 LEFT JOIN (t3,t4) ON t1.a=t3.a AND t3.a=t4.a),t2
4133 WHERE t1.a=t2.a
4134 @endcode
4135 Similarly the original query can be rewritten to the query:
4136 @code
4137 SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t2.a=t4.a AND t3.a=t4.a
4138 WHERE t1.a=t2.a
4139 @endcode
4140 that is equivalent to:
4141 @code
4142 SELECT * FROM (t2 LEFT JOIN (t3,t4)ON t2.a=t4.a AND t3.a=t4.a), t1
4143 WHERE t1.a=t2.a
4144 @endcode
4145 Thus, applying equalities from the where condition we basically
4146 can get more freedom in performing join operations.
4147 Although we don't use this property now, it probably makes sense to use
4148 it in the future.
4149
4150 @param thd Thread handler
4151 @param cond condition to build the multiple equalities for
4152 @param[out] retcond Returned condition
4153 @param inherited path to all inherited multiple equality items
4154 @param do_inherit whether or not to inherit equalities from other
4155 parts of the condition
4156 @param join_list list of join tables that the condition refers to
4157 @param[out] cond_equal_ref pointer to the structure to place built
4158 equalities in
4159
4160 @returns false if success, true if error
4161 */
4162
build_equal_items(THD * thd,Item * cond,Item ** retcond,COND_EQUAL * inherited,bool do_inherit,List<TABLE_LIST> * join_list,COND_EQUAL ** cond_equal_ref)4163 bool build_equal_items(THD *thd, Item *cond, Item **retcond,
4164 COND_EQUAL *inherited, bool do_inherit,
4165 List<TABLE_LIST> *join_list,
4166 COND_EQUAL **cond_equal_ref)
4167 {
4168 COND_EQUAL *cond_equal= 0;
4169
4170 if (cond)
4171 {
4172 if (build_equal_items_for_cond(thd, cond, &cond, inherited, do_inherit))
4173 return true;
4174 cond->update_used_tables();
4175 const enum Item::Type cond_type= cond->type();
4176 if (cond_type == Item::COND_ITEM &&
4177 down_cast<Item_cond *>(cond)->functype() == Item_func::COND_AND_FUNC)
4178 cond_equal= &down_cast<Item_cond_and *>(cond)->cond_equal;
4179 else if (cond_type == Item::FUNC_ITEM &&
4180 down_cast<Item_func *>(cond)->functype() == Item_func::MULT_EQUAL_FUNC)
4181 {
4182 cond_equal= new COND_EQUAL;
4183 if (cond_equal == NULL)
4184 return true;
4185 cond_equal->current_level.push_back(down_cast<Item_equal *>(cond));
4186 }
4187 }
4188 if (cond_equal)
4189 {
4190 cond_equal->upper_levels= inherited;
4191 inherited= cond_equal;
4192 }
4193 *cond_equal_ref= cond_equal;
4194
4195 if (join_list)
4196 {
4197 TABLE_LIST *table;
4198 List_iterator<TABLE_LIST> li(*join_list);
4199
4200 while ((table= li++))
4201 {
4202 if (table->join_cond_optim())
4203 {
4204 List<TABLE_LIST> *nested_join_list= table->nested_join ?
4205 &table->nested_join->join_list : NULL;
4206 Item *join_cond;
4207 if (build_equal_items(thd, table->join_cond_optim(), &join_cond,
4208 inherited, do_inherit,
4209 nested_join_list, &table->cond_equal))
4210 return true;
4211 table->set_join_cond_optim(join_cond);
4212 }
4213 }
4214 }
4215
4216 *retcond= cond;
4217 return false;
4218 }
4219
4220
4221 /**
4222 Compare field items by table order in the execution plan.
4223
4224 field1 considered as better than field2 if the table containing
4225 field1 is accessed earlier than the table containing field2.
4226 The function finds out what of two fields is better according
4227 this criteria.
4228
4229 @param field1 first field item to compare
4230 @param field2 second field item to compare
4231 @param table_join_idx index to tables determining table order
4232
4233 @retval
4234 -1 if field1 is better than field2
4235 @retval
4236 1 if field2 is better than field1
4237 @retval
4238 0 otherwise
4239 */
4240
compare_fields_by_table_order(Item_field * field1,Item_field * field2,void * table_join_idx)4241 static int compare_fields_by_table_order(Item_field *field1,
4242 Item_field *field2,
4243 void *table_join_idx)
4244 {
4245 int cmp= 0;
4246 bool outer_ref= 0;
4247 if (field1->used_tables() & OUTER_REF_TABLE_BIT)
4248 {
4249 outer_ref= 1;
4250 cmp= -1;
4251 }
4252 if (field2->used_tables() & OUTER_REF_TABLE_BIT)
4253 {
4254 outer_ref= 1;
4255 cmp++;
4256 }
4257 if (outer_ref)
4258 return cmp;
4259 JOIN_TAB **idx= (JOIN_TAB **) table_join_idx;
4260
4261 /*
4262 idx is NULL if this function was not called from JOIN::optimize()
4263 but from e.g. mysql_delete() or mysql_update(). In these cases
4264 there is only one table and both fields belong to it. Example
4265 condition where this is the case: t1.fld1=t1.fld2
4266 */
4267 if (!idx)
4268 return 0;
4269
4270 // Locate JOIN_TABs thanks to table_join_idx, then compare their index.
4271 cmp= idx[field1->table_ref->tableno()]->idx() -
4272 idx[field2->table_ref->tableno()]->idx();
4273 return cmp < 0 ? -1 : (cmp ? 1 : 0);
4274 }
4275
4276
4277 /**
4278 Generate minimal set of simple equalities equivalent to a multiple equality.
4279
4280 The function retrieves the fields of the multiple equality item
4281 item_equal and for each field f:
4282 - if item_equal contains const it generates the equality f=const_item;
4283 - otherwise, if f is not the first field, generates the equality
4284 f=item_equal->get_first().
4285 All generated equality are added to the cond conjunction.
4286
4287 @param cond condition to add the generated equality to
4288 @param upper_levels structure to access multiple equality of upper levels
4289 @param item_equal multiple equality to generate simple equality from
4290
4291 @note
4292 Before generating an equality function checks that it has not
4293 been generated for multiple equalities of the upper levels.
4294 E.g. for the following where condition
4295 WHERE a=5 AND ((a=b AND b=c) OR c>4)
4296 the upper level AND condition will contain =(5,a),
4297 while the lower level AND condition will contain =(5,a,b,c).
4298 When splitting =(5,a,b,c) into a separate equality predicates
4299 we should omit 5=a, as we have it already in the upper level.
4300 The following where condition gives us a more complicated case:
4301 WHERE t1.a=t2.b AND t3.c=t4.d AND (t2.b=t3.c OR t4.e>5 ...) AND ...
4302 Given the tables are accessed in the order t1->t2->t3->t4 for
4303 the selected query execution plan the lower level multiple
4304 equality =(t1.a,t2.b,t3.c,t4.d) formally should be converted to
4305 t1.a=t2.b AND t1.a=t3.c AND t1.a=t4.d. But t1.a=t2.a will be
4306 generated for the upper level. Also t3.c=t4.d will be generated there.
4307 So only t1.a=t3.c should be left in the lower level.
4308 If cond is equal to 0, then not more then one equality is generated
4309 and a pointer to it is returned as the result of the function.
4310
4311 @return
4312 - The condition with generated simple equalities or
4313 a pointer to the simple generated equality, if success.
4314 - 0, otherwise.
4315 */
4316
eliminate_item_equal(Item * cond,COND_EQUAL * upper_levels,Item_equal * item_equal)4317 static Item *eliminate_item_equal(Item *cond, COND_EQUAL *upper_levels,
4318 Item_equal *item_equal)
4319 {
4320 List<Item> eq_list;
4321 Item_func_eq *eq_item= NULL;
4322 if (((Item *) item_equal)->const_item() && !item_equal->val_int())
4323 return new Item_int((longlong) 0,1);
4324 Item *const item_const= item_equal->get_const();
4325 Item_equal_iterator it(*item_equal);
4326 if (!item_const)
4327 {
4328 /*
4329 If there is a const item, match all field items with the const item,
4330 otherwise match the second and subsequent field items with the first one:
4331 */
4332 it++;
4333 }
4334 Item_field *item_field; // Field to generate equality for.
4335 while ((item_field= it++))
4336 {
4337 /*
4338 Generate an equality of the form:
4339 item_field = some previous field in item_equal's list.
4340
4341 First see if we really need to generate it:
4342 */
4343 Item_equal *const upper= item_field->find_item_equal(upper_levels);
4344 if (upper) // item_field is in this upper equality
4345 {
4346 if (item_const && upper->get_const())
4347 continue; // Const at both levels, no need to generate at current level
4348 /*
4349 If the upper-level multiple equality contains this item, there is no
4350 need to generate the equality, unless item_field belongs to a
4351 semi-join nest that is used for Materialization, and refers to tables
4352 that are outside of the materialized semi-join nest,
4353 As noted in Item_equal::get_subst_item(), subquery materialization
4354 does not have this problem.
4355 */
4356 JOIN_TAB *const tab= item_field->field->table->reginfo.join_tab;
4357
4358 if (!(tab && sj_is_materialize_strategy(tab->get_sj_strategy())))
4359 {
4360 Item_field *item_match;
4361 Item_equal_iterator li(*item_equal);
4362 while ((item_match= li++) != item_field)
4363 {
4364 if (item_match->find_item_equal(upper_levels) == upper)
4365 break; // (item_match, item_field) is also in upper level equality
4366 }
4367 if (item_match != item_field)
4368 continue;
4369 }
4370 } // ... if (upper).
4371
4372 /*
4373 item_field should be compared with the head of the multiple equality
4374 list.
4375 item_field may refer to a table that is within a semijoin materialization
4376 nest. In that case, the order of the join_tab entries may look like:
4377
4378 ot1 ot2 <subquery> ot5 SJM(it3 it4)
4379
4380 If we have a multiple equality
4381
4382 (ot1.c1, ot2.c2, <subquery>.c it3.c3, it4.c4, ot5.c5),
4383
4384 we should generate the following equalities:
4385 1. ot1.c1 = ot2.c2
4386 2. ot1.c1 = <subquery>.c
4387 3. it3.c3 = it4.c4
4388 4. ot1.c1 = ot5.c5
4389
4390 Equalities 1) and 4) are regular equalities between two outer tables.
4391 Equality 2) is an equality that matches the outer query with a
4392 materialized temporary table. It is either performed as a lookup
4393 into the materialized table (SJM-lookup), or as a condition on the
4394 outer table (SJM-scan).
4395 Equality 3) is evaluated during semijoin materialization.
4396
4397 If there is a const item, match against this one.
4398 Otherwise, match against the first field item in the multiple equality,
4399 unless the item is within a materialized semijoin nest, in case it will
4400 be matched against the first item within the SJM nest.
4401 @see JOIN::set_prefix_tables()
4402 @see Item_equal::get_subst_item()
4403 */
4404
4405 Item *const head=
4406 item_const ? item_const : item_equal->get_subst_item(item_field);
4407 if (head == item_field)
4408 continue;
4409
4410 // we have a pair, can generate 'item_field=head'
4411 if (eq_item)
4412 eq_list.push_back(eq_item);
4413
4414 eq_item= new Item_func_eq(item_field, head);
4415 if (!eq_item || eq_item->set_cmp_func())
4416 return NULL;
4417 eq_item->quick_fix_field();
4418 } // ... while ((item_field= it++))
4419
4420 if (!cond && !eq_list.head())
4421 {
4422 if (!eq_item)
4423 return new Item_int((longlong) 1,1);
4424 return eq_item;
4425 }
4426
4427 if (eq_item)
4428 eq_list.push_back(eq_item);
4429 if (!cond)
4430 cond= new Item_cond_and(eq_list);
4431 else
4432 {
4433 assert(cond->type() == Item::COND_ITEM);
4434 if (eq_list.elements)
4435 ((Item_cond *) cond)->add_at_head(&eq_list);
4436 }
4437
4438 cond->quick_fix_field();
4439 cond->update_used_tables();
4440
4441 return cond;
4442 }
4443
4444
4445 /**
4446 Substitute every field reference in a condition by the best equal field
4447 and eliminate all multiple equality predicates.
4448
4449 The function retrieves the cond condition and for each encountered
4450 multiple equality predicate it sorts the field references in it
4451 according to the order of tables specified by the table_join_idx
4452 parameter. Then it eliminates the multiple equality predicate it
4453 replacing it by the conjunction of simple equality predicates
4454 equating every field from the multiple equality to the first
4455 field in it, or to the constant, if there is any.
4456 After this the function retrieves all other conjuncted
4457 predicates substitute every field reference by the field reference
4458 to the first equal field or equal constant if there are any.
4459
4460 @param cond condition to process
4461 @param cond_equal multiple equalities to take into consideration
4462 @param table_join_idx index to tables determining field preference
4463
4464 @note
4465 At the first glance full sort of fields in multiple equality
4466 seems to be an overkill. Yet it's not the case due to possible
4467 new fields in multiple equality item of lower levels. We want
4468 the order in them to comply with the order of upper levels.
4469
4470 @return
4471 The transformed condition, or NULL in case of error
4472 */
4473
substitute_for_best_equal_field(Item * cond,COND_EQUAL * cond_equal,void * table_join_idx)4474 Item* substitute_for_best_equal_field(Item *cond,
4475 COND_EQUAL *cond_equal,
4476 void *table_join_idx)
4477 {
4478 Item_equal *item_equal;
4479
4480 if (cond->type() == Item::COND_ITEM)
4481 {
4482 List<Item> *cond_list= ((Item_cond*) cond)->argument_list();
4483
4484 bool and_level= ((Item_cond*) cond)->functype() ==
4485 Item_func::COND_AND_FUNC;
4486 if (and_level)
4487 {
4488 cond_equal= &((Item_cond_and *) cond)->cond_equal;
4489 cond_list->disjoin((List<Item> *) &cond_equal->current_level);
4490
4491 List_iterator_fast<Item_equal> it(cond_equal->current_level);
4492 while ((item_equal= it++))
4493 {
4494 item_equal->sort(&compare_fields_by_table_order, table_join_idx);
4495 }
4496 }
4497
4498 List_iterator<Item> li(*cond_list);
4499 Item *item;
4500 while ((item= li++))
4501 {
4502 Item *new_item= substitute_for_best_equal_field(item, cond_equal,
4503 table_join_idx);
4504 if (new_item == NULL)
4505 return NULL;
4506 /*
4507 This works OK with PS/SP re-execution as changes are made to
4508 the arguments of AND/OR items only
4509 */
4510 if (new_item != item)
4511 li.replace(new_item);
4512 }
4513
4514 if (and_level)
4515 {
4516 List_iterator_fast<Item_equal> it(cond_equal->current_level);
4517 while ((item_equal= it++))
4518 {
4519 cond= eliminate_item_equal(cond, cond_equal->upper_levels, item_equal);
4520 if (cond == NULL)
4521 return NULL;
4522 // This occurs when eliminate_item_equal() founds that cond is
4523 // always false and substitutes it with Item_int 0.
4524 // Due to this, value of item_equal will be 0, so just return it.
4525 if (cond->type() != Item::COND_ITEM)
4526 break;
4527 }
4528 }
4529 if (cond->type() == Item::COND_ITEM &&
4530 !((Item_cond*)cond)->argument_list()->elements)
4531 cond= new Item_int((int32)cond->val_bool());
4532
4533 }
4534 else if (cond->type() == Item::FUNC_ITEM &&
4535 ((Item_cond*) cond)->functype() == Item_func::MULT_EQUAL_FUNC)
4536 {
4537 item_equal= (Item_equal *) cond;
4538 item_equal->sort(&compare_fields_by_table_order, table_join_idx);
4539 if (cond_equal && cond_equal->current_level.head() == item_equal)
4540 cond_equal= cond_equal->upper_levels;
4541 return eliminate_item_equal(0, cond_equal, item_equal);
4542 }
4543 else
4544 cond->transform(&Item::replace_equal_field, 0);
4545 return cond;
4546 }
4547
4548
4549 /**
4550 change field = field to field = const for each found field = const in the
4551 and_level
4552
4553 @param thd Thread handler
4554 @param save_list
4555 @param and_father
4556 @param cond Condition where fields are replaced with constant values
4557 @param field The field that will be substituted
4558 @param value The substitution value
4559
4560 @returns false if success, true if error
4561 */
4562
4563 static bool
change_cond_ref_to_const(THD * thd,I_List<COND_CMP> * save_list,Item * and_father,Item * cond,Item * field,Item * value)4564 change_cond_ref_to_const(THD *thd, I_List<COND_CMP> *save_list,
4565 Item *and_father, Item *cond,
4566 Item *field, Item *value)
4567 {
4568 if (cond->type() == Item::COND_ITEM)
4569 {
4570 Item_cond *const item_cond= down_cast<Item_cond *>(cond);
4571 bool and_level= item_cond->functype() == Item_func::COND_AND_FUNC;
4572 List_iterator<Item> li(*item_cond->argument_list());
4573 Item *item;
4574 while ((item=li++))
4575 {
4576 if (change_cond_ref_to_const(thd, save_list,
4577 and_level ? cond : item,
4578 item, field, value))
4579 return true;
4580 }
4581 return false;
4582 }
4583 if (cond->eq_cmp_result() == Item::COND_OK)
4584 return false; // Not a boolean function
4585
4586 Item_bool_func2 *func= down_cast<Item_bool_func2 *>(cond);
4587 Item **args= func->arguments();
4588 Item *left_item= args[0];
4589 Item *right_item= args[1];
4590 Item_func::Functype functype= func->functype();
4591
4592 if (right_item->eq(field,0) && left_item != value &&
4593 right_item->cmp_context == field->cmp_context &&
4594 (left_item->result_type() != STRING_RESULT ||
4595 value->result_type() != STRING_RESULT ||
4596 left_item->collation.collation == value->collation.collation))
4597 {
4598 Item *const clone= value->clone_item();
4599 if (thd->is_error())
4600 return true;
4601
4602 if (clone == NULL)
4603 return false;
4604
4605 clone->collation.set(right_item->collation);
4606 thd->change_item_tree(args + 1, clone);
4607 func->update_used_tables();
4608 if ((functype == Item_func::EQ_FUNC ||
4609 functype == Item_func::EQUAL_FUNC) &&
4610 and_father != cond && !left_item->const_item())
4611 {
4612 cond->marker=1;
4613 COND_CMP *const cond_cmp= new COND_CMP(and_father,func);
4614 if (cond_cmp == NULL)
4615 return true;
4616
4617 save_list->push_back(cond_cmp);
4618
4619 }
4620 if (func->set_cmp_func())
4621 return true;
4622 }
4623 else if (left_item->eq(field,0) && right_item != value &&
4624 left_item->cmp_context == field->cmp_context &&
4625 (right_item->result_type() != STRING_RESULT ||
4626 value->result_type() != STRING_RESULT ||
4627 right_item->collation.collation == value->collation.collation))
4628 {
4629 Item *const clone= value->clone_item();
4630 if (thd->is_error())
4631 return true;
4632
4633 if (clone == NULL)
4634 return false;
4635
4636 clone->collation.set(left_item->collation);
4637 thd->change_item_tree(args, clone);
4638 value= clone;
4639 func->update_used_tables();
4640 if ((functype == Item_func::EQ_FUNC ||
4641 functype == Item_func::EQUAL_FUNC) &&
4642 and_father != cond && !right_item->const_item())
4643 {
4644 args[0]= args[1]; // For easy check
4645 thd->change_item_tree(args + 1, value);
4646 cond->marker=1;
4647 COND_CMP *const cond_cmp= new COND_CMP(and_father,func);
4648 if (cond_cmp == NULL)
4649 return true;
4650
4651 save_list->push_back(cond_cmp);
4652 }
4653 if (func->set_cmp_func())
4654 return true;
4655 }
4656 return false;
4657 }
4658
4659 /**
4660 Propagate constant values in a condition
4661
4662 @param thd Thread handler
4663 @param save_list
4664 @param and_father
4665 @param cond Condition for which constant values are propagated
4666
4667 @returns false if success, true if error
4668 */
4669 static bool
propagate_cond_constants(THD * thd,I_List<COND_CMP> * save_list,Item * and_father,Item * cond)4670 propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list,
4671 Item *and_father, Item *cond)
4672 {
4673 if (cond->type() == Item::COND_ITEM)
4674 {
4675 Item_cond *const item_cond= down_cast<Item_cond *>(cond);
4676 bool and_level= item_cond->functype() == Item_func::COND_AND_FUNC;
4677 List_iterator_fast<Item> li(*item_cond->argument_list());
4678 Item *item;
4679 I_List<COND_CMP> save;
4680 while ((item=li++))
4681 {
4682 if (propagate_cond_constants(thd, &save, and_level ? cond : item, item))
4683 return true;
4684 }
4685 if (and_level)
4686 { // Handle other found items
4687 I_List_iterator<COND_CMP> cond_itr(save);
4688 COND_CMP *cond_cmp;
4689 while ((cond_cmp= cond_itr++))
4690 {
4691 Item **args= cond_cmp->cmp_func->arguments();
4692 if (!args[0]->const_item() &&
4693 change_cond_ref_to_const(thd, &save, cond_cmp->and_level,
4694 cond_cmp->and_level, args[0], args[1]))
4695 return true;
4696 }
4697 }
4698 }
4699 else if (and_father != cond && !cond->marker) // In a AND group
4700 {
4701 Item_func *func;
4702 if (cond->type() == Item::FUNC_ITEM &&
4703 (func= down_cast<Item_func *>(cond)) &&
4704 (func->functype() == Item_func::EQ_FUNC ||
4705 func->functype() == Item_func::EQUAL_FUNC))
4706 {
4707 Item **args= func->arguments();
4708 bool left_const= args[0]->const_item();
4709 bool right_const= args[1]->const_item();
4710 if (!(left_const && right_const) &&
4711 args[0]->result_type() == args[1]->result_type())
4712 {
4713 if (right_const)
4714 {
4715 if (resolve_const_item(thd, &args[1], args[0]))
4716 return true;
4717 func->update_used_tables();
4718 if (change_cond_ref_to_const(thd, save_list, and_father, and_father,
4719 args[0], args[1]))
4720 return true;
4721 }
4722 else if (left_const)
4723 {
4724 if (resolve_const_item(thd, &args[0], args[1]))
4725 return true;
4726 func->update_used_tables();
4727 if (change_cond_ref_to_const(thd, save_list, and_father, and_father,
4728 args[1], args[0]))
4729 return true;
4730 }
4731 }
4732 }
4733 }
4734
4735 return false;
4736 }
4737
4738
4739 /**
4740 Assign each nested join structure a bit in nested_join_map.
4741
4742 @param join_list List of tables
4743 @param first_unused Number of first unused bit in nested_join_map before the
4744 call
4745
4746 @note
4747 This function is called after simplify_joins(), when there are no
4748 redundant nested joins.
4749 We cannot have more nested joins in a query block than there are tables,
4750 so as long as the number of bits in nested_join_map is not less than the
4751 maximum number of tables in a query block, nested_join_map can never
4752 overflow.
4753
4754 @return
4755 First unused bit in nested_join_map after the call.
4756 */
4757
build_bitmap_for_nested_joins(List<TABLE_LIST> * join_list,uint first_unused)4758 uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list,
4759 uint first_unused)
4760 {
4761 List_iterator<TABLE_LIST> li(*join_list);
4762 TABLE_LIST *table;
4763 DBUG_ENTER("build_bitmap_for_nested_joins");
4764 while ((table= li++))
4765 {
4766 NESTED_JOIN *nested_join;
4767 if ((nested_join= table->nested_join))
4768 {
4769 // We should have either a join condition or a semi-join condition
4770 assert((table->join_cond() == NULL) == (table->sj_cond() != NULL));
4771
4772 nested_join->nj_map= 0;
4773 nested_join->nj_total= 0;
4774 /*
4775 We only record nested join information for outer join nests.
4776 Tables belonging in semi-join nests are recorded in the
4777 embedding outer join nest, if one exists.
4778 */
4779 if (table->join_cond())
4780 {
4781 assert(first_unused < sizeof(nested_join_map)*8);
4782 nested_join->nj_map= (nested_join_map) 1 << first_unused++;
4783 nested_join->nj_total= nested_join->join_list.elements;
4784 }
4785 else if (table->sj_cond())
4786 {
4787 NESTED_JOIN *const outer_nest=
4788 table->embedding ? table->embedding->nested_join : NULL;
4789 /*
4790 The semi-join nest has already been counted into the table count
4791 for the outer join nest as one table, so subtract 1 from the
4792 table count.
4793 */
4794 if (outer_nest)
4795 outer_nest->nj_total+= (nested_join->join_list.elements - 1);
4796 }
4797 else
4798 assert(false);
4799
4800 first_unused= build_bitmap_for_nested_joins(&nested_join->join_list,
4801 first_unused);
4802 }
4803 }
4804 DBUG_RETURN(first_unused);
4805 }
4806
4807
4808 /** Update the dependency map for the tables. */
4809
update_depend_map()4810 void JOIN::update_depend_map()
4811 {
4812 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
4813 for (uint tableno = 0; tableno < tables; tableno++)
4814 {
4815 JOIN_TAB *const tab= best_ref[tableno];
4816 TABLE_REF *const ref= &tab->ref();
4817 table_map depend_map= 0;
4818 Item **item= ref->items;
4819 for (uint i = 0; i < ref->key_parts; i++, item++)
4820 depend_map|= (*item)->used_tables();
4821 depend_map&= ~PSEUDO_TABLE_BITS;
4822 ref->depend_map= depend_map;
4823 for (JOIN_TAB **tab2= map2table; depend_map; tab2++, depend_map >>= 1)
4824 {
4825 if (depend_map & 1)
4826 ref->depend_map|= (*tab2)->ref().depend_map;
4827 }
4828 }
4829 }
4830
4831
4832 /** Update the dependency map for the sort order. */
4833
update_depend_map(ORDER * order)4834 void JOIN::update_depend_map(ORDER *order)
4835 {
4836 for (; order ; order=order->next)
4837 {
4838 table_map depend_map;
4839 order->item[0]->update_used_tables();
4840 order->depend_map= depend_map=
4841 order->item[0]->used_tables() & ~PARAM_TABLE_BIT;
4842 order->used= 0;
4843 // Not item_sum(), RAND() and no reference to table outside of sub select
4844 if (!(order->depend_map & (OUTER_REF_TABLE_BIT | RAND_TABLE_BIT))
4845 && !order->item[0]->with_sum_func)
4846 {
4847 for (JOIN_TAB **tab= map2table; depend_map; tab++, depend_map >>= 1)
4848 {
4849 if (depend_map & 1)
4850 order->depend_map|=(*tab)->ref().depend_map;
4851 }
4852 }
4853 }
4854 }
4855
4856
4857 /**
4858 Update equalities and keyuse references after semi-join materialization
4859 strategy is chosen.
4860
4861 @details
4862 For each multiple equality that contains a field that is selected
4863 from a subquery, and that subquery is executed using a semi-join
4864 materialization strategy, add the corresponding column in the materialized
4865 temporary table to the equality.
4866 For each injected semi-join equality that is not converted to
4867 multiple equality, replace the reference to the expression selected
4868 from the subquery with the corresponding column in the temporary table.
4869
4870 This is needed to properly reflect the equalities that involve injected
4871 semi-join equalities when materialization strategy is chosen.
4872 @see eliminate_item_equal() for how these equalities are used to generate
4873 correct equality predicates.
4874
4875 The MaterializeScan semi-join strategy requires some additional processing:
4876 All primary tables after the materialized temporary table must be inspected
4877 for keyuse objects that point to expressions from the subquery tables.
4878 These references must be replaced with references to corresponding columns
4879 in the materialized temporary table instead. Those primary tables using
4880 ref access will thus be made to depend on the materialized temporary table
4881 instead of the subquery tables.
4882
4883 Only the injected semi-join equalities need this treatment, other predicates
4884 will be handled correctly by the regular item substitution process.
4885
4886 @return False if success, true if error
4887 */
4888
update_equalities_for_sjm()4889 bool JOIN::update_equalities_for_sjm()
4890 {
4891 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
4892 List_iterator<Semijoin_mat_exec> it(sjm_exec_list);
4893 Semijoin_mat_exec *sjm_exec;
4894 while ((sjm_exec= it++))
4895 {
4896 TABLE_LIST *const sj_nest= sjm_exec->sj_nest;
4897
4898 assert(!sj_nest->outer_join_nest());
4899 /*
4900 A materialized semi-join nest cannot actually be an inner part of an
4901 outer join yet, this is just a preparatory step,
4902 ie sj_nest->outer_join_nest() is always NULL here.
4903 @todo: Enable outer joining here later.
4904 */
4905 Item *cond= sj_nest->outer_join_nest() ?
4906 sj_nest->outer_join_nest()->join_cond_optim() : where_cond;
4907 if (!cond)
4908 continue;
4909
4910 uchar *dummy= NULL;
4911 cond= cond->compile(&Item::equality_substitution_analyzer, &dummy,
4912 &Item::equality_substitution_transformer,
4913 (uchar *)sj_nest);
4914 if (cond == NULL)
4915 return true;
4916
4917 cond->update_used_tables();
4918
4919 // Loop over all primary tables that follow the materialized table
4920 for (uint j= sjm_exec->mat_table_index + 1; j < primary_tables; j++)
4921 {
4922 JOIN_TAB *const tab= best_ref[j];
4923 for (Key_use *keyuse= tab->position()->key;
4924 keyuse && keyuse->table_ref == tab->table_ref &&
4925 keyuse->key == tab->position()->key->key;
4926 keyuse++)
4927 {
4928 List_iterator<Item> it(sj_nest->nested_join->sj_inner_exprs);
4929 Item *old;
4930 uint fieldno= 0;
4931 while ((old= it++))
4932 {
4933 if (old->real_item()->eq(keyuse->val->real_item(), false))
4934 {
4935 /*
4936 Replace the expression selected from the subquery with the
4937 corresponding column of the materialized temporary table.
4938 */
4939 keyuse->val= sj_nest->nested_join->sjm.mat_fields[fieldno];
4940 keyuse->used_tables= keyuse->val->used_tables();
4941 break;
4942 }
4943 fieldno++;
4944 }
4945 }
4946 }
4947 }
4948
4949 return false;
4950 }
4951
4952
4953 /**
4954 Assign set of available (prefix) tables to all tables in query block.
4955 Also set added tables, ie the tables added in each JOIN_TAB compared to the
4956 previous JOIN_TAB.
4957 This function must be called for every query block after the table order
4958 has been determined.
4959 */
4960
set_prefix_tables()4961 void JOIN::set_prefix_tables()
4962 {
4963 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
4964 assert(!plan_is_const());
4965 /*
4966 The const tables are available together with the first non-const table in
4967 the join order.
4968 */
4969 table_map const initial_tables_map= const_table_map |
4970 (allow_outer_refs ? OUTER_REF_TABLE_BIT : 0);
4971
4972 table_map current_tables_map= initial_tables_map;
4973 table_map prev_tables_map= (table_map) 0;
4974 table_map saved_tables_map= (table_map) 0;
4975
4976 JOIN_TAB *last_non_sjm_tab= NULL; // Track the last non-sjm table
4977
4978 for (uint i= const_tables; i < tables; i++)
4979 {
4980 JOIN_TAB *const tab= best_ref[i];
4981 if (!tab->table())
4982 continue;
4983 /*
4984 Tables that are within SJ-Materialization nests cannot have their
4985 conditions referring to preceding non-const tables.
4986 - If we're looking at the first SJM table, reset current_tables_map
4987 to refer to only allowed tables
4988 @see Item_equal::get_subst_item()
4989 @see eliminate_item_equal()
4990 */
4991 if (sj_is_materialize_strategy(tab->get_sj_strategy()))
4992 {
4993 const table_map sjm_inner_tables= tab->emb_sj_nest->sj_inner_tables;
4994 if (!(sjm_inner_tables & current_tables_map))
4995 {
4996 saved_tables_map= current_tables_map;
4997 current_tables_map= initial_tables_map;
4998 prev_tables_map= (table_map) 0;
4999 }
5000
5001 current_tables_map|= tab->table_ref->map();
5002 tab->set_prefix_tables(current_tables_map, prev_tables_map);
5003 prev_tables_map= current_tables_map;
5004
5005 if (!(sjm_inner_tables & ~current_tables_map))
5006 {
5007 /*
5008 At the end of a semi-join materialization nest,
5009 add non-deterministic expressions to the last table of the nest:
5010 */
5011 tab->add_prefix_tables(RAND_TABLE_BIT);
5012
5013 // Restore the previous map:
5014 current_tables_map= saved_tables_map;
5015 prev_tables_map= last_non_sjm_tab ?
5016 last_non_sjm_tab->prefix_tables() : (table_map) 0;
5017 }
5018 }
5019 else
5020 {
5021 last_non_sjm_tab= tab;
5022 current_tables_map|= tab->table_ref->map();
5023 tab->set_prefix_tables(current_tables_map, prev_tables_map);
5024 prev_tables_map= current_tables_map;
5025 }
5026 }
5027 /*
5028 Non-deterministic expressions must be added to the last table's condition.
5029 It solves problem with queries like SELECT * FROM t1 WHERE rand() > 0.5
5030 */
5031 if (last_non_sjm_tab != NULL)
5032 last_non_sjm_tab->add_prefix_tables(RAND_TABLE_BIT);
5033 }
5034
5035
5036 /**
5037 Calculate best possible join order and initialize the join structure.
5038
5039 @return true if success, false if error.
5040
5041 The JOIN object is populated with statistics about the query,
5042 and a plan with table order and access method selection is made.
5043
5044 The list of tables to be optimized is taken from select_lex->leaf_tables.
5045 JOIN::where_cond is also used in the optimization.
5046 As a side-effect, JOIN::keyuse_array is populated with key_use information.
5047
5048 Here is an overview of the logic of this function:
5049
5050 - Initialize JOIN data structures and setup basic dependencies between tables.
5051
5052 - Update dependencies based on join information.
5053
5054 - Make key descriptions (update_ref_and_keys()).
5055
5056 - Pull out semi-join tables based on table dependencies.
5057
5058 - Extract tables with zero or one rows as const tables.
5059
5060 - Read contents of const tables, substitute columns from these tables with
5061 actual data. Also keep track of empty tables vs. one-row tables.
5062
5063 - After const table extraction based on row count, more tables may
5064 have become functionally dependent. Extract these as const tables.
5065
5066 - Add new sargable predicates based on retrieved const values.
5067
5068 - Calculate number of rows to be retrieved from each table.
5069
5070 - Calculate cost of potential semi-join materializations.
5071
5072 - Calculate best possible join order based on available statistics.
5073
5074 - Fill in remaining information for the generated join order.
5075 */
5076
make_join_plan()5077 bool JOIN::make_join_plan()
5078 {
5079 DBUG_ENTER("JOIN::make_join_plan");
5080
5081 SARGABLE_PARAM *sargables= NULL;
5082
5083 Opt_trace_context * const trace= &thd->opt_trace;
5084
5085 if (init_planner_arrays()) // Create and initialize the arrays
5086 DBUG_RETURN(true);
5087
5088 // Outer join dependencies were initialized above, now complete the analysis.
5089 if (select_lex->outer_join)
5090 propagate_dependencies();
5091
5092 if (unlikely(trace->is_started()))
5093 trace_table_dependencies(trace, join_tab, primary_tables);
5094
5095 // Build the key access information, which is the basis for ref access.
5096 if (where_cond || select_lex->outer_join)
5097 {
5098 if (update_ref_and_keys(thd, &keyuse_array, join_tab, tables, where_cond,
5099 cond_equal, ~select_lex->outer_join, select_lex,
5100 &sargables))
5101 DBUG_RETURN(true);
5102 }
5103
5104 /*
5105 Pull out semi-join tables based on dependencies. Dependencies are valid
5106 throughout the lifetime of a query, so this operation can be performed
5107 on the first optimization only.
5108 */
5109 if (!select_lex->sj_pullout_done && select_lex->sj_nests.elements &&
5110 pull_out_semijoin_tables(this))
5111 DBUG_RETURN(true);
5112
5113 select_lex->sj_pullout_done= true;
5114 const uint sj_nests= select_lex->sj_nests.elements; // Changed by pull-out
5115
5116 if (!(select_lex->active_options() & OPTION_NO_CONST_TABLES))
5117 {
5118 // Detect tables that are const (0 or 1 row) and read their contents.
5119 if (extract_const_tables())
5120 DBUG_RETURN(true);
5121
5122 // Detect tables that are functionally dependent on const values.
5123 if (extract_func_dependent_tables())
5124 DBUG_RETURN(true);
5125 }
5126 // Possibly able to create more sargable predicates from const rows.
5127 if (const_tables && sargables)
5128 update_sargable_from_const(sargables);
5129
5130 // Make a first estimate of the fanout for each table in the query block.
5131 if (estimate_rowcount())
5132 DBUG_RETURN(true);
5133
5134 if (sj_nests)
5135 {
5136 set_semijoin_embedding();
5137 select_lex->update_semijoin_strategies(thd);
5138 }
5139
5140 if (!plan_is_const())
5141 optimize_keyuse();
5142
5143 allow_outer_refs= true;
5144
5145 if (sj_nests && optimize_semijoin_nests_for_materialization(this))
5146 DBUG_RETURN(true);
5147
5148 // Choose the table order based on analysis done so far.
5149 if (Optimize_table_order(thd, this, NULL).choose_table_order())
5150 DBUG_RETURN(true);
5151
5152 DBUG_EXECUTE_IF("bug13820776_1", thd->killed= THD::KILL_QUERY;);
5153 if (thd->killed || thd->is_error())
5154 DBUG_RETURN(true);
5155
5156 // If this is a subquery, decide between In-to-exists and materialization
5157 if (unit->item && decide_subquery_strategy())
5158 DBUG_RETURN(true);
5159
5160 refine_best_rowcount();
5161
5162 if (!(thd->variables.option_bits & OPTION_BIG_SELECTS) &&
5163 best_read > (double) thd->variables.max_join_size &&
5164 !thd->lex->is_explain())
5165 { /* purecov: inspected */
5166 my_message(ER_TOO_BIG_SELECT, ER(ER_TOO_BIG_SELECT), MYF(0));
5167 error= -1;
5168 DBUG_RETURN(1);
5169 }
5170
5171 positions= NULL; // But keep best_positions for get_best_combination
5172
5173 /*
5174 Store the cost of this query into a user variable
5175 Don't update m_current_query_cost for statements that are not "flat joins" :
5176 i.e. they have subqueries, unions or call stored procedures.
5177 TODO: calculate a correct cost for a query with subqueries and UNIONs.
5178 */
5179 if (thd->lex->is_single_level_stmt())
5180 thd->m_current_query_cost= best_read;
5181
5182 // Generate an execution plan from the found optimal join order.
5183 if (get_best_combination())
5184 DBUG_RETURN(true);
5185
5186 // Cleanup after update_ref_and_keys has added keys for derived tables.
5187 if (select_lex->materialized_derived_table_count)
5188 drop_unused_derived_keys();
5189
5190 // No need for this struct after new JOIN_TAB array is set up.
5191 best_positions= NULL;
5192
5193 // Some called function may still set error status unnoticed
5194 if (thd->is_error())
5195 DBUG_RETURN(true);
5196
5197 // There is at least one empty const table
5198 if (const_table_map != found_const_table_map)
5199 zero_result_cause= "no matching row in const table";
5200
5201 DBUG_RETURN(false);
5202 }
5203
5204
5205 /**
5206 Initialize scratch arrays for the join order optimization
5207
5208 @returns false if success, true if error
5209
5210 @note If something fails during initialization, JOIN::cleanup()
5211 will free anything that has been partially allocated and set up.
5212 Arrays are created in the execution mem_root, so they will be
5213 deleted automatically when the mem_root is re-initialized.
5214 */
5215
init_planner_arrays()5216 bool JOIN::init_planner_arrays()
5217 {
5218 // Up to one extra slot per semi-join nest is needed (if materialized)
5219 const uint sj_nests= select_lex->sj_nests.elements;
5220 const uint table_count= select_lex->leaf_table_count;
5221
5222 assert(primary_tables == 0 && tables == 0);
5223
5224 if (!(join_tab= alloc_jtab_array(thd, table_count)))
5225 return true;
5226
5227 /*
5228 We add 2 cells:
5229 - because planning stage uses 0-termination so needs +1
5230 - because after get_best_combination, we don't use 0-termination but
5231 need +2, to host at most 2 tmp sort/group/distinct tables.
5232 */
5233 if (!(best_ref= (JOIN_TAB **) thd->alloc(sizeof(JOIN_TAB *) *
5234 (table_count + sj_nests + 2))))
5235 return true;
5236
5237 // sort/group tmp tables have no map
5238 if (!(map2table= (JOIN_TAB **) thd->alloc(sizeof(JOIN_TAB *) *
5239 (table_count + sj_nests))))
5240 return true;
5241
5242 if (!(positions= new (thd->mem_root) POSITION[table_count]))
5243 return true;
5244
5245 if (!(best_positions= new (thd->mem_root) POSITION[table_count+sj_nests]))
5246 return true;
5247
5248 /*
5249 Initialize data structures for tables to be joined.
5250 Initialize dependencies between tables.
5251 */
5252 JOIN_TAB **best_ref_p= best_ref;
5253 TABLE_LIST *tl= select_lex->leaf_tables;
5254
5255 for (JOIN_TAB *tab= join_tab;
5256 tl;
5257 tab++, tl= tl->next_leaf, best_ref_p++)
5258 {
5259 *best_ref_p= tab;
5260 TABLE *const table= tl->table;
5261 tab->table_ref= tl;
5262 tab->set_table(table);
5263 const int err= tl->fetch_number_of_rows();
5264
5265 // Initialize the cost model for the table
5266 table->init_cost_model(cost_model());
5267
5268 DBUG_EXECUTE_IF("bug11747970_raise_error",
5269 {
5270 if (!err)
5271 {
5272 my_error(ER_UNKNOWN_ERROR, MYF(0));
5273 return true;
5274 }
5275 });
5276
5277 if (err)
5278 {
5279 table->file->print_error(err, MYF(0));
5280 return true;
5281 }
5282 table->quick_keys.clear_all();
5283 table->possible_quick_keys.clear_all();
5284 table->reginfo.not_exists_optimize= false;
5285 memset(table->const_key_parts, 0, sizeof(key_part_map)*table->s->keys);
5286 all_table_map|= tl->map();
5287 tab->set_join(this);
5288
5289 tab->dependent= tl->dep_tables; // Initialize table dependencies
5290 if (tl->schema_table)
5291 table->file->stats.records= 2;
5292 table->quick_condition_rows= table->file->stats.records;
5293
5294 tab->init_join_cond_ref(tl);
5295
5296 if (tl->outer_join_nest())
5297 {
5298 // tab belongs to a nested join, maybe to several embedding joins
5299 tab->embedding_map= 0;
5300 for (TABLE_LIST *embedding= tl->embedding;
5301 embedding;
5302 embedding= embedding->embedding)
5303 {
5304 NESTED_JOIN *const nested_join= embedding->nested_join;
5305 tab->embedding_map|= nested_join->nj_map;
5306 tab->dependent|= embedding->dep_tables;
5307 }
5308 }
5309 else if (tab->join_cond())
5310 {
5311 // tab is the only inner table of an outer join
5312 tab->embedding_map= 0;
5313 for (TABLE_LIST *embedding= tl->embedding;
5314 embedding;
5315 embedding= embedding->embedding)
5316 tab->embedding_map|= embedding->nested_join->nj_map;
5317 }
5318 tables++; // Count number of initialized tables
5319 }
5320
5321 primary_tables= tables;
5322 *best_ref_p= NULL; // Last element of array must be NULL
5323
5324 return false;
5325 }
5326
5327
5328 /**
5329 Propagate dependencies between tables due to outer join relations.
5330
5331 @returns false if success, true if error
5332
5333 Build transitive closure for relation 'to be dependent on'.
5334 This will speed up the plan search for many cases with outer joins,
5335 as well as allow us to catch illegal cross references.
5336 Warshall's algorithm is used to build the transitive closure.
5337 As we may restart the outer loop upto 'table_count' times, the
5338 complexity of the algorithm is O((number of tables)^3).
5339 However, most of the iterations will be shortcircuited when
5340 there are no dependencies to propagate.
5341 */
5342
propagate_dependencies()5343 bool JOIN::propagate_dependencies()
5344 {
5345 for (uint i= 0; i < tables; i++)
5346 {
5347 if (!join_tab[i].dependent)
5348 continue;
5349
5350 // Add my dependencies to other tables depending on me
5351 uint j;
5352 JOIN_TAB *tab;
5353 for (j= 0, tab= join_tab; j < tables; j++, tab++)
5354 {
5355 if (tab->dependent & join_tab[i].table_ref->map())
5356 {
5357 const table_map was_dependent= tab->dependent;
5358 tab->dependent|= join_tab[i].dependent;
5359 /*
5360 If we change dependencies for a table we already have
5361 processed: Redo dependency propagation from this table.
5362 */
5363 if (i > j && tab->dependent != was_dependent)
5364 {
5365 i= j-1;
5366 break;
5367 }
5368 }
5369 }
5370 }
5371
5372 JOIN_TAB *const tab_end= join_tab + tables;
5373 for (JOIN_TAB *tab= join_tab; tab < tab_end; tab++)
5374 {
5375 /*
5376 Catch illegal cross references for outer joins.
5377 This could happen before WL#2486 was implemented in 5.0, but should no
5378 longer be possible.
5379 Thus, an assert has been added should this happen again.
5380 @todo Remove the error check below.
5381 */
5382 assert(!(tab->dependent & tab->table_ref->map()));
5383
5384 if (tab->dependent & tab->table_ref->map())
5385 {
5386 tables= 0; // Don't use join->table
5387 primary_tables= 0;
5388 my_message(ER_WRONG_OUTER_JOIN, ER(ER_WRONG_OUTER_JOIN), MYF(0));
5389 return true;
5390 }
5391
5392 tab->key_dependent= tab->dependent;
5393 }
5394
5395 return false;
5396 }
5397
5398
5399 /**
5400 Extract const tables based on row counts.
5401
5402 @returns false if success, true if error
5403
5404 This extraction must be done for each execution.
5405 Tables containing exactly zero or one rows are marked as const, but
5406 notice the additional constraints checked below.
5407 Tables that are extracted have their rows read before actual execution
5408 starts and are placed in the beginning of the join_tab array.
5409 Thus, they do not take part in join order optimization process,
5410 which can significantly reduce the optimization time.
5411 The data read from these tables can also be regarded as "constant"
5412 throughout query execution, hence the column values can be used for
5413 additional constant propagation and extraction of const tables based
5414 on eq-ref properties.
5415
5416 The tables are given the type JT_SYSTEM.
5417 */
5418
extract_const_tables()5419 bool JOIN::extract_const_tables()
5420 {
5421 enum enum_const_table_extraction
5422 {
5423 extract_no_table= 0,
5424 extract_empty_table= 1,
5425 extract_const_table= 2
5426 };
5427
5428 JOIN_TAB *const tab_end= join_tab + tables;
5429 for (JOIN_TAB *tab= join_tab; tab < tab_end; tab++)
5430 {
5431 TABLE *const table= tab->table();
5432 TABLE_LIST *const tl= tab->table_ref;
5433 enum enum_const_table_extraction extract_method= extract_const_table;
5434
5435 const bool all_partitions_pruned_away= table->all_partitions_pruned_away;
5436
5437 if (tl->outer_join_nest())
5438 {
5439 /*
5440 Table belongs to a nested join, no candidate for const table extraction.
5441 */
5442 extract_method= extract_no_table;
5443 }
5444 else if (tl->embedding && tl->embedding->sj_cond())
5445 {
5446 /*
5447 Table belongs to a semi-join.
5448 We do not currently pull out const tables from semi-join nests.
5449 */
5450 extract_method= extract_no_table;
5451 }
5452 else if (tab->join_cond())
5453 {
5454 // tab is the only inner table of an outer join, extract empty tables
5455 extract_method= extract_empty_table;
5456 }
5457 switch (extract_method)
5458 {
5459 case extract_no_table:
5460 break;
5461
5462 case extract_empty_table:
5463 // Extract tables with zero rows, but only if statistics are exact
5464 if ((table->file->stats.records == 0 ||
5465 all_partitions_pruned_away) &&
5466 (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT))
5467 mark_const_table(tab, NULL);
5468 break;
5469
5470 case extract_const_table:
5471 /*
5472 Extract tables with zero or one rows, but do not extract tables that
5473 1. are dependent upon other tables, or
5474 2. have no exact statistics, or
5475 3. are full-text searched
5476 */
5477 if ((table->s->system ||
5478 table->file->stats.records <= 1 ||
5479 all_partitions_pruned_away) &&
5480 !tab->dependent && // 1
5481 (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) && // 2
5482 !table->fulltext_searched) // 3
5483 mark_const_table(tab, NULL);
5484 break;
5485 }
5486 }
5487
5488 // Read const tables (tables matching no more than 1 rows)
5489 if (!const_tables)
5490 return false;
5491
5492 for (POSITION *p_pos= positions, *p_end= p_pos + const_tables;
5493 p_pos < p_end;
5494 p_pos++)
5495 {
5496 JOIN_TAB *const tab= p_pos->table;
5497 const int status= join_read_const_table(tab, p_pos);
5498 if (status > 0)
5499 return true;
5500 else if (status == 0)
5501 {
5502 found_const_table_map|= tab->table_ref->map();
5503 tab->table_ref->optimized_away= true;
5504 }
5505 }
5506
5507 return false;
5508 }
5509
5510 /**
5511 Extract const tables based on functional dependencies.
5512
5513 @returns false if success, true if error
5514
5515 This extraction must be done for each execution.
5516
5517 Mark as const the tables that
5518 - are functionally dependent on constant values, or
5519 - are inner tables of an outer join and contain exactly zero or one rows
5520
5521 Tables that are extracted have their rows read before actual execution
5522 starts and are placed in the beginning of the join_tab array, just as
5523 described for JOIN::extract_const_tables().
5524
5525 The tables are given the type JT_CONST.
5526 */
5527
extract_func_dependent_tables()5528 bool JOIN::extract_func_dependent_tables()
5529 {
5530 // loop until no more const tables are found
5531 bool ref_changed;
5532 table_map found_ref;
5533 do
5534 {
5535 more_const_tables_found:
5536 ref_changed = false;
5537 found_ref= 0;
5538
5539 // Loop over all tables that are not already determined to be const
5540 for (JOIN_TAB **pos= best_ref + const_tables; *pos; pos++)
5541 {
5542 JOIN_TAB *const tab= *pos;
5543 TABLE *const table= tab->table();
5544 TABLE_LIST *const tl= tab->table_ref;
5545 /*
5546 If equi-join condition by a key is null rejecting and after a
5547 substitution of a const table the key value happens to be null
5548 then we can state that there are no matches for this equi-join.
5549 */
5550 Key_use *keyuse= tab->keyuse();
5551 if (keyuse && tab->join_cond() && !tab->embedding_map)
5552 {
5553 /*
5554 When performing an outer join operation if there are no matching rows
5555 for the single row of the outer table all the inner tables are to be
5556 null complemented and thus considered as constant tables.
5557 Here we apply this consideration to the case of outer join operations
5558 with a single inner table only because the case with nested tables
5559 would require a more thorough analysis.
5560 TODO. Apply single row substitution to null complemented inner tables
5561 for nested outer join operations.
5562 */
5563 while (keyuse->table_ref == tl)
5564 {
5565 if (!(keyuse->val->used_tables() & ~const_table_map) &&
5566 keyuse->val->is_null() && keyuse->null_rejecting)
5567 {
5568 table->set_null_row();
5569 found_const_table_map|= tl->map();
5570 mark_const_table(tab, keyuse);
5571 goto more_const_tables_found;
5572 }
5573 keyuse++;
5574 }
5575 }
5576
5577 if (tab->dependent) // If dependent on some table
5578 {
5579 // All dependent tables must be const
5580 if (tab->dependent & ~const_table_map)
5581 continue;
5582 /*
5583 Mark a dependent table as constant if
5584 1. it has exactly zero or one rows (it is a system table), and
5585 2. it is not within a nested outer join, and
5586 3. it does not have an expensive outer join condition.
5587 This is because we have to determine whether an outer-joined table
5588 has a real row or a null-extended row in the optimizer phase.
5589 We have no possibility to evaluate its join condition at
5590 execution time, when it is marked as a system table.
5591 */
5592 if (table->file->stats.records <= 1L && // 1
5593 (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) && // 1
5594 !tl->outer_join_nest() && // 2
5595 !(tab->join_cond() && tab->join_cond()->is_expensive())) // 3
5596 { // system table
5597 mark_const_table(tab, NULL);
5598 const int status=
5599 join_read_const_table(tab, positions + const_tables - 1);
5600 if (status > 0)
5601 return true;
5602 else if (status == 0)
5603 found_const_table_map|= tl->map();
5604 continue;
5605 }
5606 }
5607
5608 // Check if table can be read by key or table only uses const refs
5609
5610 if ((keyuse= tab->keyuse()))
5611 {
5612 while (keyuse->table_ref == tl)
5613 {
5614 Key_use *const start_keyuse= keyuse;
5615 const uint key= keyuse->key;
5616 tab->keys().set_bit(key); // QQ: remove this ?
5617
5618 table_map refs= 0;
5619 key_map const_ref, eq_part;
5620 do
5621 {
5622 if (keyuse->val->type() != Item::NULL_ITEM && !keyuse->optimize)
5623 {
5624 if (!((~found_const_table_map) & keyuse->used_tables))
5625 const_ref.set_bit(keyuse->keypart);
5626 else
5627 refs|= keyuse->used_tables;
5628 eq_part.set_bit(keyuse->keypart);
5629 }
5630 keyuse++;
5631 } while (keyuse->table_ref == tl && keyuse->key == key);
5632
5633 /*
5634 Extract const tables with proper key dependencies.
5635 Exclude tables that
5636 1. are full-text searched, or
5637 2. are part of nested outer join, or
5638 3. are part of semi-join, or
5639 4. have an expensive outer join condition.
5640 5. are blocked by handler for const table optimize.
5641 */
5642 if (eq_part.is_prefix(table->key_info[key].user_defined_key_parts) &&
5643 !table->fulltext_searched && // 1
5644 !tl->outer_join_nest() && // 2
5645 !(tl->embedding && tl->embedding->sj_cond()) && // 3
5646 !(tab->join_cond() && tab->join_cond()->is_expensive()) &&// 4
5647 !(table->file->ha_table_flags() & HA_BLOCK_CONST_TABLE)) // 5
5648 {
5649 if (table->key_info[key].flags & HA_NOSAME)
5650 {
5651 if (const_ref == eq_part)
5652 { // Found everything for ref.
5653 ref_changed = true;
5654 mark_const_table(tab, start_keyuse);
5655 if (create_ref_for_key(this, tab, start_keyuse,
5656 found_const_table_map))
5657 return true;
5658 const int status=
5659 join_read_const_table(tab, positions + const_tables - 1);
5660 if (status > 0)
5661 return true;
5662 else if (status == 0)
5663 found_const_table_map|= tl->map();
5664 break;
5665 }
5666 else
5667 found_ref|= refs; // Table is const if all refs are const
5668 }
5669 else if (const_ref == eq_part)
5670 tab->const_keys.set_bit(key);
5671 }
5672 }
5673 }
5674 }
5675 } while ((const_table_map & found_ref) && ref_changed);
5676
5677 return false;
5678 }
5679
5680 /**
5681 Update info on indexes that can be used for search lookups as
5682 reading const tables may has added new sargable predicates.
5683 */
5684
update_sargable_from_const(SARGABLE_PARAM * sargables)5685 void JOIN::update_sargable_from_const(SARGABLE_PARAM *sargables)
5686 {
5687 for ( ; sargables->field; sargables++)
5688 {
5689 Field *const field= sargables->field;
5690 JOIN_TAB *const tab= field->table->reginfo.join_tab;
5691 key_map possible_keys= field->key_start;
5692 possible_keys.intersect(field->table->keys_in_use_for_query);
5693 bool is_const= true;
5694 for (uint j= 0; j < sargables->num_values; j++)
5695 is_const&= sargables->arg_value[j]->const_item();
5696 if (is_const)
5697 {
5698 tab->const_keys.merge(possible_keys);
5699 tab->keys().merge(possible_keys);
5700 }
5701 }
5702 }
5703
5704
5705 /**
5706 Estimate the number of matched rows for each joined table.
5707 Set up range scan for tables that have proper predicates.
5708
5709 @returns false if success, true if error
5710 */
5711
estimate_rowcount()5712 bool JOIN::estimate_rowcount()
5713 {
5714 Opt_trace_context *const trace= &thd->opt_trace;
5715 Opt_trace_object trace_wrapper(trace);
5716 Opt_trace_array trace_records(trace, "rows_estimation");
5717
5718 JOIN_TAB *const tab_end= join_tab + tables;
5719 for (JOIN_TAB *tab= join_tab; tab < tab_end; tab++)
5720 {
5721 const Cost_model_table *const cost_model= tab->table()->cost_model();
5722 Opt_trace_object trace_table(trace);
5723 trace_table.add_utf8_table(tab->table_ref);
5724 if (tab->type() == JT_SYSTEM || tab->type() == JT_CONST)
5725 {
5726 trace_table.add("rows", 1).add("cost", 1)
5727 .add_alnum("table_type", (tab->type() == JT_SYSTEM) ? "system": "const")
5728 .add("empty", tab->table()->has_null_row());
5729
5730 // Only one matching row and one block to read
5731 tab->set_records(tab->found_records= 1);
5732 tab->worst_seeks= cost_model->page_read_cost(1.0);
5733 tab->read_time= static_cast<ha_rows>(tab->worst_seeks);
5734 continue;
5735 }
5736 // Approximate number of found rows and cost to read them
5737 tab->set_records(tab->found_records= tab->table()->file->stats.records);
5738 const Cost_estimate table_scan_time= tab->table()->file->table_scan_cost();
5739 tab->read_time= static_cast<ha_rows>(table_scan_time.total_cost());
5740
5741 /*
5742 Set a max value for the cost of seek operations we can expect
5743 when using key lookup. This can't be too high as otherwise we
5744 are likely to use table scan.
5745 */
5746 tab->worst_seeks=
5747 min(cost_model->page_read_cost((double) tab->found_records / 10),
5748 (double) tab->read_time * 3);
5749 const double min_worst_seek= cost_model->page_read_cost(2.0);
5750 if (tab->worst_seeks < min_worst_seek) // Fix for small tables
5751 tab->worst_seeks= min_worst_seek;
5752
5753 /*
5754 Add to tab->const_keys those indexes for which all group fields or
5755 all select distinct fields participate in one index.
5756 */
5757 add_group_and_distinct_keys(this, tab);
5758
5759 /*
5760 Perform range analysis if there are keys it could use (1).
5761 Don't do range analysis if on the inner side of an outer join (2).
5762 Do range analysis if on the inner side of a semi-join (3).
5763 */
5764 TABLE_LIST *const tl= tab->table_ref;
5765 if (!tab->const_keys.is_clear_all() && // (1)
5766 (!tl->embedding || // (2)
5767 (tl->embedding && tl->embedding->sj_cond()))) // (3)
5768 {
5769 /*
5770 This call fills tab->quick() with the best QUICK access method
5771 possible for this table, and only if it's better than table scan.
5772 It also fills tab->needed_reg.
5773 */
5774 ha_rows records= get_quick_record_count(thd, tab, row_limit);
5775
5776 if (records == 0 && thd->is_error())
5777 return true;
5778
5779 /*
5780 Check for "impossible range", but make sure that we do not attempt
5781 to mark semi-joined tables as "const" (only semi-joined tables that
5782 are functionally dependent can be marked "const", and subsequently
5783 pulled out of their semi-join nests).
5784 */
5785 if (records == 0 &&
5786 tab->table()->reginfo.impossible_range &&
5787 (!(tl->embedding && tl->embedding->sj_cond())))
5788 {
5789 /*
5790 Impossible WHERE condition or join condition
5791 In case of join cond, mark that one empty NULL row is matched.
5792 In case of WHERE, don't set found_const_table_map to get the
5793 caller to abort with a zero row result.
5794 */
5795 mark_const_table(tab, NULL);
5796 tab->set_type(JT_CONST); // Override setting made in mark_const_table()
5797 if (tab->join_cond())
5798 {
5799 // Generate an empty row
5800 trace_table.add("returning_empty_null_row", true).
5801 add_alnum("cause", "impossible_on_condition");
5802 found_const_table_map|= tl->map();
5803 tab->table()->set_null_row(); // All fields are NULL
5804 }
5805 else
5806 {
5807 trace_table.add("rows", 0).
5808 add_alnum("cause", "impossible_where_condition");
5809 }
5810 }
5811 if (records != HA_POS_ERROR)
5812 {
5813 tab->found_records= records;
5814 tab->read_time= (ha_rows) (tab->quick() ?
5815 tab->quick()->cost_est.total_cost() : 0.0);
5816 }
5817 }
5818 else
5819 {
5820 Opt_trace_object(trace, "table_scan").
5821 add("rows", tab->found_records).
5822 add("cost", tab->read_time);
5823 }
5824 }
5825
5826 return false;
5827 }
5828
5829
5830 /**
5831 Set semi-join embedding join nest pointers.
5832
5833 Set pointer to embedding semi-join nest for all semi-joined tables.
5834 Note that this must be done for every table inside all semi-join nests,
5835 even for tables within outer join nests embedded in semi-join nests.
5836 A table can never be part of multiple semi-join nests, hence no
5837 ambiguities can ever occur.
5838 Note also that the pointer is not set for TABLE_LIST objects that
5839 are outer join nests within semi-join nests.
5840 */
5841
set_semijoin_embedding()5842 void JOIN::set_semijoin_embedding()
5843 {
5844 assert(!select_lex->sj_nests.is_empty());
5845
5846 JOIN_TAB *const tab_end= join_tab + primary_tables;
5847
5848 for (JOIN_TAB *tab= join_tab; tab < tab_end; tab++)
5849 {
5850 for (TABLE_LIST *tl= tab->table_ref; tl->embedding; tl= tl->embedding)
5851 {
5852 if (tl->embedding->sj_cond())
5853 {
5854 tab->emb_sj_nest= tl->embedding;
5855 break;
5856 }
5857 }
5858 }
5859 }
5860
5861
5862 /**
5863 @brief Check if semijoin's compared types allow materialization.
5864
5865 @param[inout] sj_nest Semi-join nest containing information about correlated
5866 expressions. Set nested_join->sjm.scan_allowed to TRUE if
5867 MaterializeScan strategy allowed. Set nested_join->sjm.lookup_allowed
5868 to TRUE if MaterializeLookup strategy allowed
5869
5870 @details
5871 This is a temporary fix for BUG#36752.
5872
5873 There are two subquery materialization strategies for semijoin:
5874
5875 1. Materialize and do index lookups in the materialized table. See
5876 BUG#36752 for description of restrictions we need to put on the
5877 compared expressions.
5878
5879 In addition, since indexes are not supported for BLOB columns,
5880 this strategy can not be used if any of the columns in the
5881 materialized table will be BLOB/GEOMETRY columns. (Note that
5882 also columns for non-BLOB values that may be greater in size
5883 than CONVERT_IF_BIGGER_TO_BLOB, will be represented as BLOB
5884 columns.)
5885
5886 2. Materialize and then do a full scan of the materialized table.
5887 The same criteria as for MaterializeLookup are applied, except that
5888 BLOB/GEOMETRY columns are allowed.
5889 */
5890
5891 static
semijoin_types_allow_materialization(TABLE_LIST * sj_nest)5892 void semijoin_types_allow_materialization(TABLE_LIST *sj_nest)
5893 {
5894 DBUG_ENTER("semijoin_types_allow_materialization");
5895
5896 assert(sj_nest->nested_join->sj_outer_exprs.elements ==
5897 sj_nest->nested_join->sj_inner_exprs.elements);
5898
5899 if (sj_nest->nested_join->sj_outer_exprs.elements > MAX_REF_PARTS)
5900 {
5901 sj_nest->nested_join->sjm.scan_allowed= false;
5902 sj_nest->nested_join->sjm.lookup_allowed= false;
5903 DBUG_VOID_RETURN;
5904 }
5905
5906 List_iterator<Item> it1(sj_nest->nested_join->sj_outer_exprs);
5907 List_iterator<Item> it2(sj_nest->nested_join->sj_inner_exprs);
5908
5909 sj_nest->nested_join->sjm.scan_allowed= true;
5910 sj_nest->nested_join->sjm.lookup_allowed= true;
5911
5912 bool blobs_involved= false;
5913 Item *outer, *inner;
5914 uint total_lookup_index_length= 0;
5915 uint max_key_length;
5916 uint max_key_part_length;
5917 /*
5918 Maximum lengths for keys and key parts that are supported by
5919 the temporary table storage engine(s).
5920 */
5921 get_max_key_and_part_length(&max_key_length,
5922 &max_key_part_length);
5923 while (outer= it1++, inner= it2++)
5924 {
5925 assert(outer->real_item() && inner->real_item());
5926 if (!types_allow_materialization(outer, inner))
5927 {
5928 sj_nest->nested_join->sjm.scan_allowed= false;
5929 sj_nest->nested_join->sjm.lookup_allowed= false;
5930 DBUG_VOID_RETURN;
5931 }
5932 blobs_involved|= inner->is_blob_field();
5933
5934 // Calculate the index length of materialized table
5935 const uint lookup_index_length= get_key_length_tmp_table(inner);
5936 if (lookup_index_length > max_key_part_length)
5937 sj_nest->nested_join->sjm.lookup_allowed= false;
5938 total_lookup_index_length+= lookup_index_length ;
5939 }
5940 if (total_lookup_index_length > max_key_length)
5941 sj_nest->nested_join->sjm.lookup_allowed= false;
5942
5943 if (blobs_involved)
5944 sj_nest->nested_join->sjm.lookup_allowed= false;
5945
5946 if (sj_nest->embedding)
5947 {
5948 assert(sj_nest->embedding->join_cond_optim());
5949 /*
5950 There are two issues that prevent materialization strategy from being
5951 used when a semi-join nest is on the inner side of an outer join:
5952 1. If the semi-join contains dependencies to outer tables,
5953 materialize-scan strategy cannot be used.
5954 2. Make sure that executor is able to evaluate triggered conditions
5955 for semi-join materialized tables. It should be correct, but needs
5956 verification.
5957 TODO: Remove this limitation!
5958 Handle this by disabling materialization strategies:
5959 */
5960 sj_nest->nested_join->sjm.scan_allowed= false;
5961 sj_nest->nested_join->sjm.lookup_allowed= false;
5962 DBUG_VOID_RETURN;
5963 }
5964
5965 DBUG_PRINT("info",("semijoin_types_allow_materialization: ok, allowed"));
5966
5967 DBUG_VOID_RETURN;
5968 }
5969
5970
5971 /*****************************************************************************
5972 Create JOIN_TABS, make a guess about the table types,
5973 Approximate how many records will be used in each table
5974 *****************************************************************************/
5975
5976 /**
5977 Returns estimated number of rows that could be fetched by given
5978 access method.
5979
5980 The function calls the range optimizer to estimate the cost of the
5981 cheapest QUICK_* index access method to scan one or several of the
5982 'keys' using the conditions 'select->cond'. The range optimizer
5983 compares several different types of 'quick select' methods (range
5984 scan, index merge, loose index scan) and selects the cheapest one.
5985
5986 If the best index access method is cheaper than a table- and an index
5987 scan, then the range optimizer also constructs the corresponding
5988 QUICK_* object and assigns it to select->quick. In most cases this
5989 is the QUICK_* object used at later (optimization and execution)
5990 phases.
5991
5992 @param thd Session that runs the query.
5993 @param tab JOIN_TAB of source table.
5994 @param limit maximum number of rows to select.
5995
5996 @note
5997 In case of valid range, a QUICK_SELECT_I object will be constructed and
5998 saved in select->quick.
5999
6000 @return Estimated number of result rows selected from 'tab'.
6001
6002 @retval HA_POS_ERROR For derived tables/views or if an error occur.
6003 @retval 0 If impossible query (i.e. certainly no rows will be
6004 selected.)
6005 */
get_quick_record_count(THD * thd,JOIN_TAB * tab,ha_rows limit)6006 static ha_rows get_quick_record_count(THD *thd, JOIN_TAB *tab, ha_rows limit)
6007 {
6008 DBUG_ENTER("get_quick_record_count");
6009 uchar buff[STACK_BUFF_ALLOC];
6010 if (check_stack_overrun(thd, STACK_MIN_SIZE, buff))
6011 DBUG_RETURN(0); // Fatal error flag is set
6012
6013 TABLE_LIST *const tl= tab->table_ref;
6014
6015 // Derived tables aren't filled yet, so no stats are available.
6016 if (!tl->uses_materialization())
6017 {
6018 QUICK_SELECT_I *qck;
6019 int error= test_quick_select(thd,
6020 tab->const_keys,
6021 0, //empty table_map
6022 limit,
6023 false, //don't force quick range
6024 ORDER::ORDER_NOT_RELEVANT, tab,
6025 tab->join_cond() ? tab->join_cond() :
6026 tab->join()->where_cond,
6027 &tab->needed_reg, &qck, tab->table()->force_index);
6028 tab->set_quick(qck);
6029
6030 if (error == 1)
6031 DBUG_RETURN(qck->records);
6032 if (error == -1)
6033 {
6034 tl->table->reginfo.impossible_range=1;
6035 DBUG_RETURN(0);
6036 }
6037 DBUG_PRINT("warning",("Couldn't use record count on const keypart"));
6038 }
6039 else if (tl->materializable_is_const())
6040 {
6041 DBUG_RETURN(tl->derived_unit()->query_result()->estimated_rowcount);
6042 }
6043 DBUG_RETURN(HA_POS_ERROR);
6044 }
6045
6046 /*
6047 Get estimated record length for semi-join materialization temptable
6048
6049 SYNOPSIS
6050 get_tmp_table_rec_length()
6051 items IN subquery's select list.
6052
6053 DESCRIPTION
6054 Calculate estimated record length for semi-join materialization
6055 temptable. It's an estimate because we don't follow every bit of
6056 create_tmp_table()'s logic. This isn't necessary as the return value of
6057 this function is used only for cost calculations.
6058
6059 RETURN
6060 Length of the temptable record, in bytes
6061 */
6062
get_tmp_table_rec_length(List<Item> & items)6063 static uint get_tmp_table_rec_length(List<Item> &items)
6064 {
6065 uint len= 0;
6066 Item *item;
6067 List_iterator<Item> it(items);
6068 while ((item= it++))
6069 {
6070 switch (item->result_type()) {
6071 case REAL_RESULT:
6072 len += sizeof(double);
6073 break;
6074 case INT_RESULT:
6075 if (item->max_length >= (MY_INT32_NUM_DECIMAL_DIGITS - 1))
6076 len += 8;
6077 else
6078 len += 4;
6079 break;
6080 case STRING_RESULT:
6081 /* DATE/TIME and GEOMETRY fields have STRING_RESULT result type. */
6082 if (item->is_temporal() || item->field_type() == MYSQL_TYPE_GEOMETRY)
6083 len += 8;
6084 else
6085 len += item->max_length;
6086 break;
6087 case DECIMAL_RESULT:
6088 len += 10;
6089 break;
6090 case ROW_RESULT:
6091 default:
6092 assert(0); /* purecov: deadcode */
6093 break;
6094 }
6095 }
6096 return len;
6097 }
6098
6099
6100 /**
6101 Writes to the optimizer trace information about dependencies between
6102 tables.
6103 @param trace optimizer trace
6104 @param join_tabs all JOIN_TABs of the join
6105 @param table_count how many JOIN_TABs in the 'join_tabs' array
6106 */
trace_table_dependencies(Opt_trace_context * trace,JOIN_TAB * join_tabs,uint table_count)6107 static void trace_table_dependencies(Opt_trace_context * trace,
6108 JOIN_TAB *join_tabs,
6109 uint table_count)
6110 {
6111 Opt_trace_object trace_wrapper(trace);
6112 Opt_trace_array trace_dep(trace, "table_dependencies");
6113 for (uint i= 0 ; i < table_count ; i++)
6114 {
6115 TABLE_LIST *table_ref= join_tabs[i].table_ref;
6116 Opt_trace_object trace_one_table(trace);
6117 trace_one_table.add_utf8_table(table_ref).
6118 add("row_may_be_null", table_ref->table->is_nullable());
6119 const table_map map= table_ref->map();
6120 assert(map < (1ULL << table_count));
6121 for (uint j= 0; j < table_count; j++)
6122 {
6123 if (map & (1ULL << j))
6124 {
6125 trace_one_table.add("map_bit", j);
6126 break;
6127 }
6128 }
6129 Opt_trace_array depends_on(trace, "depends_on_map_bits");
6130 // RAND_TABLE_BIT may be in join_tabs[i].dependent, so we test all 64 bits
6131 compile_time_assert(sizeof(table_ref->map()) <= 64);
6132 for (uint j= 0; j < 64; j++)
6133 {
6134 if (join_tabs[i].dependent & (1ULL << j))
6135 depends_on.add(j);
6136 }
6137 }
6138 }
6139
6140
6141 /**
6142 Add to join_tab[i]->condition() "table.field IS NOT NULL" conditions
6143 we've inferred from ref/eq_ref access performed.
6144
6145 This function is a part of "Early NULL-values filtering for ref access"
6146 optimization.
6147
6148 Example of this optimization:
6149 For query SELECT * FROM t1,t2 WHERE t2.key=t1.field @n
6150 and plan " any-access(t1), ref(t2.key=t1.field) " @n
6151 add "t1.field IS NOT NULL" to t1's table condition. @n
6152
6153 Description of the optimization:
6154
6155 We look through equalities choosen to perform ref/eq_ref access,
6156 pick equalities that have form "tbl.part_of_key = othertbl.field"
6157 (where othertbl is a non-const table and othertbl.field may be NULL)
6158 and add them to conditions on correspoding tables (othertbl in this
6159 example).
6160
6161 Exception from that is the case when referred_tab->join != join.
6162 I.e. don't add NOT NULL constraints from any embedded subquery.
6163 Consider this query:
6164 @code
6165 SELECT A.f2 FROM t1 LEFT JOIN t2 A ON A.f2 = f1
6166 WHERE A.f3=(SELECT MIN(f3) FROM t2 C WHERE A.f4 = C.f4) OR A.f3 IS NULL;
6167 @endcode
6168 Here condition A.f3 IS NOT NULL is going to be added to the WHERE
6169 condition of the embedding query.
6170 Another example:
6171 SELECT * FROM t10, t11 WHERE (t10.a < 10 OR t10.a IS NULL)
6172 AND t11.b <=> t10.b AND (t11.a = (SELECT MAX(a) FROM t12
6173 WHERE t12.b = t10.a ));
6174 Here condition t10.a IS NOT NULL is going to be added.
6175 In both cases addition of NOT NULL condition will erroneously reject
6176 some rows of the result set.
6177 referred_tab->join != join constraint would disallow such additions.
6178
6179 This optimization doesn't affect the choices that ref, range, or join
6180 optimizer make. This was intentional because this was added after 4.1
6181 was GA.
6182
6183 Implementation overview
6184 1. update_ref_and_keys() accumulates info about null-rejecting
6185 predicates in in Key_field::null_rejecting
6186 1.1 add_key_part saves these to Key_use.
6187 2. create_ref_for_key copies them to TABLE_REF.
6188 3. add_not_null_conds adds "x IS NOT NULL" to join_tab->m_condition of
6189 appropiate JOIN_TAB members.
6190 */
6191
add_not_null_conds(JOIN * join)6192 static void add_not_null_conds(JOIN *join)
6193 {
6194 DBUG_ENTER("add_not_null_conds");
6195 ASSERT_BEST_REF_IN_JOIN_ORDER(join);
6196 for (uint i=join->const_tables ; i < join->tables ; i++)
6197 {
6198 JOIN_TAB *const tab= join->best_ref[i];
6199 if ((tab->type() == JT_REF || tab->type() == JT_EQ_REF ||
6200 tab->type() == JT_REF_OR_NULL) &&
6201 !tab->table()->is_nullable())
6202 {
6203 for (uint keypart= 0; keypart < tab->ref().key_parts; keypart++)
6204 {
6205 if (tab->ref().null_rejecting & ((key_part_map)1 << keypart))
6206 {
6207 Item *item= tab->ref().items[keypart];
6208 Item *notnull;
6209 Item *real= item->real_item();
6210 assert(real->type() == Item::FIELD_ITEM);
6211 Item_field *not_null_item= (Item_field*)real;
6212 JOIN_TAB *referred_tab= not_null_item->field->table->reginfo.join_tab;
6213 /*
6214 For UPDATE queries such as:
6215 UPDATE t1 SET t1.f2=(SELECT MAX(t2.f4) FROM t2 WHERE t2.f3=t1.f1);
6216 not_null_item is the t1.f1, but it's referred_tab is 0.
6217 */
6218 if (!referred_tab || referred_tab->join() != join)
6219 continue;
6220 if (!(notnull= new Item_func_isnotnull(not_null_item)))
6221 DBUG_VOID_RETURN;
6222 /*
6223 We need to do full fix_fields() call here in order to have correct
6224 notnull->const_item(). This is needed e.g. by test_quick_select
6225 when it is called from make_join_select after this function is
6226 called.
6227 */
6228 if (notnull->fix_fields(join->thd, ¬null))
6229 DBUG_VOID_RETURN;
6230 DBUG_EXECUTE("where",print_where(notnull,
6231 referred_tab->table()->alias,
6232 QT_ORDINARY););
6233 referred_tab->and_with_condition(notnull);
6234 }
6235 }
6236 }
6237 }
6238 DBUG_VOID_RETURN;
6239 }
6240
6241
6242 /**
6243 Check if given expression only uses fields covered by index #keyno in the
6244 table tbl. The expression can use any fields in any other tables.
6245
6246 The expression is guaranteed not to be AND or OR - those constructs are
6247 handled outside of this function.
6248
6249 Restrict some function types from being pushed down to storage engine:
6250 a) Don't push down the triggered conditions. Nested outer joins execution
6251 code may need to evaluate a condition several times (both triggered and
6252 untriggered).
6253 b) Stored functions contain a statement that might start new operations (like
6254 DML statements) from within the storage engine. This does not work against
6255 all SEs.
6256 c) Subqueries might contain nested subqueries and involve more tables.
6257
6258 @param item Expression to check
6259 @param tbl The table having the index
6260 @param keyno The index number
6261 @param other_tbls_ok TRUE <=> Fields of other non-const tables are allowed
6262
6263 @return false if No, true if Yes
6264 */
6265
uses_index_fields_only(Item * item,TABLE * tbl,uint keyno,bool other_tbls_ok)6266 bool uses_index_fields_only(Item *item, TABLE *tbl, uint keyno,
6267 bool other_tbls_ok)
6268 {
6269 // Restrictions b and c.
6270 if (item->has_stored_program() || item->has_subquery())
6271 return false;
6272
6273 if (item->const_item())
6274 return true;
6275
6276 const Item::Type item_type= item->type();
6277
6278 switch (item_type) {
6279 case Item::FUNC_ITEM:
6280 {
6281 Item_func *item_func= (Item_func*)item;
6282 const Item_func::Functype func_type= item_func->functype();
6283
6284 /*
6285 Restriction a.
6286 TODO: Consider cloning the triggered condition and using the copies
6287 for:
6288 1. push the first copy down, to have most restrictive index condition
6289 possible.
6290 2. Put the second copy into tab->m_condition.
6291 */
6292 if (func_type == Item_func::TRIG_COND_FUNC)
6293 return false;
6294
6295 /* This is a function, apply condition recursively to arguments */
6296 if (item_func->argument_count() > 0)
6297 {
6298 Item **item_end= (item_func->arguments()) + item_func->argument_count();
6299 for (Item **child= item_func->arguments(); child != item_end; child++)
6300 {
6301 if (!uses_index_fields_only(*child, tbl, keyno, other_tbls_ok))
6302 return FALSE;
6303 }
6304 }
6305 return TRUE;
6306 }
6307 case Item::COND_ITEM:
6308 {
6309 /*
6310 This is a AND/OR condition. Regular AND/OR clauses are handled by
6311 make_cond_for_index() which will chop off the part that can be
6312 checked with index. This code is for handling non-top-level AND/ORs,
6313 e.g. func(x AND y).
6314 */
6315 List_iterator<Item> li(*((Item_cond*)item)->argument_list());
6316 Item *item;
6317 while ((item=li++))
6318 {
6319 if (!uses_index_fields_only(item, tbl, keyno, other_tbls_ok))
6320 return FALSE;
6321 }
6322 return TRUE;
6323 }
6324 case Item::FIELD_ITEM:
6325 {
6326 Item_field *item_field= (Item_field*)item;
6327 if (item_field->field->table != tbl)
6328 return other_tbls_ok;
6329 /*
6330 The below is probably a repetition - the first part checks the
6331 other two, but let's play it safe:
6332 */
6333 return item_field->field->part_of_key.is_set(keyno) &&
6334 item_field->field->type() != MYSQL_TYPE_GEOMETRY &&
6335 item_field->field->type() != MYSQL_TYPE_BLOB;
6336 }
6337 case Item::REF_ITEM:
6338 return uses_index_fields_only(item->real_item(), tbl, keyno,
6339 other_tbls_ok);
6340 default:
6341 return FALSE; /* Play it safe, don't push unknown non-const items */
6342 }
6343 }
6344
6345
6346 /**
6347 Optimize semi-join nests that could be run with sj-materialization
6348
6349 @param join The join to optimize semi-join nests for
6350
6351 @details
6352 Optimize each of the semi-join nests that can be run with
6353 materialization. For each of the nests, we
6354 - Generate the best join order for this "sub-join" and remember it;
6355 - Remember the sub-join execution cost (it's part of materialization
6356 cost);
6357 - Calculate other costs that will be incurred if we decide
6358 to use materialization strategy for this semi-join nest.
6359
6360 All obtained information is saved and will be used by the main join
6361 optimization pass.
6362
6363 @return false if successful, true if error
6364 */
6365
optimize_semijoin_nests_for_materialization(JOIN * join)6366 static bool optimize_semijoin_nests_for_materialization(JOIN *join)
6367 {
6368 DBUG_ENTER("optimize_semijoin_nests_for_materialization");
6369 List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests);
6370 TABLE_LIST *sj_nest;
6371 Opt_trace_context * const trace= &join->thd->opt_trace;
6372
6373 while ((sj_nest= sj_list_it++))
6374 {
6375 /* As a precaution, reset pointers that were used in prior execution */
6376 sj_nest->nested_join->sjm.positions= NULL;
6377
6378 /* Calculate the cost of materialization if materialization is allowed. */
6379 if (sj_nest->nested_join->sj_enabled_strategies &
6380 OPTIMIZER_SWITCH_MATERIALIZATION)
6381 {
6382 /* A semi-join nest should not contain tables marked as const */
6383 assert(!(sj_nest->sj_inner_tables & join->const_table_map));
6384
6385 Opt_trace_object trace_wrapper(trace);
6386 Opt_trace_object
6387 trace_sjmat(trace, "execution_plan_for_potential_materialization");
6388 Opt_trace_array trace_sjmat_steps(trace, "steps");
6389 /*
6390 Try semijoin materialization if the semijoin is classified as
6391 non-trivially-correlated.
6392 */
6393 if (sj_nest->nested_join->sj_corr_tables)
6394 continue;
6395 /*
6396 Check whether data types allow execution with materialization.
6397 */
6398 semijoin_types_allow_materialization(sj_nest);
6399
6400 if (!sj_nest->nested_join->sjm.scan_allowed &&
6401 !sj_nest->nested_join->sjm.lookup_allowed)
6402 continue;
6403
6404 if (Optimize_table_order(join->thd, join, sj_nest).choose_table_order())
6405 DBUG_RETURN(true);
6406 const uint n_tables= my_count_bits(sj_nest->sj_inner_tables);
6407 calculate_materialization_costs(join, sj_nest, n_tables,
6408 &sj_nest->nested_join->sjm);
6409 /*
6410 Cost data is in sj_nest->nested_join->sjm. We also need to save the
6411 plan:
6412 */
6413 if (!(sj_nest->nested_join->sjm.positions=
6414 (st_position*)join->thd->alloc(sizeof(st_position)*n_tables)))
6415 DBUG_RETURN(true);
6416 memcpy(sj_nest->nested_join->sjm.positions,
6417 join->best_positions + join->const_tables,
6418 sizeof(st_position) * n_tables);
6419 }
6420 }
6421 DBUG_RETURN(false);
6422 }
6423
6424
6425 /*
6426 Check if table's Key_use elements have an eq_ref(outer_tables) candidate
6427
6428 SYNOPSIS
6429 find_eq_ref_candidate()
6430 tl Table to be checked
6431 sj_inner_tables Bitmap of inner tables. eq_ref(inner_table) doesn't
6432 count.
6433
6434 DESCRIPTION
6435 Check if table's Key_use elements have an eq_ref(outer_tables) candidate
6436
6437 TODO
6438 Check again if it is feasible to factor common parts with constant table
6439 search
6440
6441 RETURN
6442 TRUE - There exists an eq_ref(outer-tables) candidate
6443 FALSE - Otherwise
6444 */
6445
find_eq_ref_candidate(TABLE_LIST * tl,table_map sj_inner_tables)6446 static bool find_eq_ref_candidate(TABLE_LIST *tl, table_map sj_inner_tables)
6447 {
6448 Key_use *keyuse= tl->table->reginfo.join_tab->keyuse();
6449
6450 if (keyuse)
6451 {
6452 while (1) /* For each key */
6453 {
6454 const uint key= keyuse->key;
6455 KEY *const keyinfo= tl->table->key_info + key;
6456 key_part_map bound_parts= 0;
6457 if ((keyinfo->flags & (HA_NOSAME)) == HA_NOSAME)
6458 {
6459 do /* For all equalities on all key parts */
6460 {
6461 /* Check if this is "t.keypart = expr(outer_tables) */
6462 if (!(keyuse->used_tables & sj_inner_tables) &&
6463 !(keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL))
6464 {
6465 /*
6466 Consider only if the resulting condition does not pass a NULL
6467 value through. Especially needed for a UNIQUE index on NULLable
6468 columns where a duplicate row is possible with NULL values.
6469 */
6470 if (keyuse->null_rejecting || !keyuse->val->maybe_null ||
6471 !keyinfo->key_part[keyuse->keypart].field->maybe_null())
6472 bound_parts|= (key_part_map)1 << keyuse->keypart;
6473 }
6474 keyuse++;
6475 } while (keyuse->key == key && keyuse->table_ref == tl);
6476
6477 if (bound_parts == LOWER_BITS(uint, keyinfo->user_defined_key_parts))
6478 return true;
6479 if (keyuse->table_ref != tl)
6480 return false;
6481 }
6482 else
6483 {
6484 do
6485 {
6486 keyuse++;
6487 if (keyuse->table_ref != tl)
6488 return false;
6489 }
6490 while (keyuse->key == key);
6491 }
6492 }
6493 }
6494 return false;
6495 }
6496
6497
6498 /**
6499 Pull tables out of semi-join nests based on functional dependencies
6500
6501 @param join The join where to do the semi-join table pullout
6502
6503 @return False if successful, true if error (Out of memory)
6504
6505 @details
6506 Pull tables out of semi-join nests based on functional dependencies,
6507 ie. if a table is accessed via eq_ref(outer_tables).
6508 The function may be called several times, the caller is responsible
6509 for setting up proper key information that this function acts upon.
6510
6511 PRECONDITIONS
6512 When this function is called, the join may have several semi-join nests
6513 but it is guaranteed that one semi-join nest does not contain another.
6514 For functionally dependent tables to be pulled out, key information must
6515 have been calculated (see update_ref_and_keys()).
6516
6517 POSTCONDITIONS
6518 * Tables that were pulled out are removed from the semi-join nest they
6519 belonged to and added to the parent join nest.
6520 * For these tables, the used_tables and not_null_tables fields of
6521 the semi-join nest they belonged to will be adjusted.
6522 The semi-join nest is also marked as correlated, and
6523 sj_corr_tables and sj_depends_on are adjusted if necessary.
6524 * Semi-join nests' sj_inner_tables is set equal to used_tables
6525
6526 NOTE
6527 Table pullout may make uncorrelated subquery correlated. Consider this
6528 example:
6529
6530 ... WHERE oe IN (SELECT it1.primary_key WHERE p(it1, it2) ... )
6531
6532 here table it1 can be pulled out (we have it1.primary_key=oe which gives
6533 us functional dependency). Once it1 is pulled out, all references to it1
6534 from p(it1, it2) become references to outside of the subquery and thus
6535 make the subquery (i.e. its semi-join nest) correlated.
6536 Making the subquery (i.e. its semi-join nest) correlated prevents us from
6537 using Materialization or LooseScan to execute it.
6538 */
6539
pull_out_semijoin_tables(JOIN * join)6540 static bool pull_out_semijoin_tables(JOIN *join)
6541 {
6542 TABLE_LIST *sj_nest;
6543 DBUG_ENTER("pull_out_semijoin_tables");
6544
6545 assert(!join->select_lex->sj_nests.is_empty());
6546
6547 List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests);
6548 Opt_trace_context * const trace= &join->thd->opt_trace;
6549 Opt_trace_object trace_wrapper(trace);
6550 Opt_trace_array trace_pullout(trace, "pulled_out_semijoin_tables");
6551
6552 /* Try pulling out tables from each semi-join nest */
6553 while ((sj_nest= sj_list_it++))
6554 {
6555 table_map pulled_tables= 0;
6556 List_iterator<TABLE_LIST> child_li(sj_nest->nested_join->join_list);
6557 TABLE_LIST *tbl;
6558 /*
6559 Calculate set of tables within this semi-join nest that have
6560 other dependent tables
6561 */
6562 table_map dep_tables= 0;
6563 while ((tbl= child_li++))
6564 {
6565 TABLE *const table= tbl->table;
6566 if (table &&
6567 (table->reginfo.join_tab->dependent &
6568 sj_nest->nested_join->used_tables))
6569 dep_tables|= table->reginfo.join_tab->dependent;
6570 }
6571 /*
6572 Find which tables we can pull out based on key dependency data.
6573 Note that pulling one table out can allow us to pull out some
6574 other tables too.
6575 */
6576 bool pulled_a_table;
6577 do
6578 {
6579 pulled_a_table= FALSE;
6580 child_li.rewind();
6581 while ((tbl= child_li++))
6582 {
6583 if (tbl->table &&
6584 !(pulled_tables & tbl->map()) &&
6585 !(dep_tables & tbl->map()))
6586 {
6587 if (find_eq_ref_candidate(tbl,
6588 sj_nest->nested_join->used_tables &
6589 ~pulled_tables))
6590 {
6591 pulled_a_table= TRUE;
6592 pulled_tables |= tbl->map();
6593 Opt_trace_object(trace).add_utf8_table(tbl).
6594 add("functionally_dependent", true);
6595 /*
6596 Pulling a table out of uncorrelated subquery in general makes
6597 it correlated. See the NOTE to this function.
6598 */
6599 sj_nest->nested_join->sj_corr_tables|= tbl->map();
6600 sj_nest->nested_join->sj_depends_on|= tbl->map();
6601 }
6602 }
6603 }
6604 } while (pulled_a_table);
6605
6606 child_li.rewind();
6607 /*
6608 Move the pulled out TABLE_LIST elements to the parents.
6609 */
6610 sj_nest->nested_join->used_tables&= ~pulled_tables;
6611 sj_nest->nested_join->not_null_tables&= ~pulled_tables;
6612
6613 /* sj_inner_tables is a copy of nested_join->used_tables */
6614 sj_nest->sj_inner_tables= sj_nest->nested_join->used_tables;
6615
6616 if (pulled_tables)
6617 {
6618 List<TABLE_LIST> *upper_join_list= (sj_nest->embedding != NULL) ?
6619 &sj_nest->embedding->nested_join->join_list :
6620 &join->select_lex->top_join_list;
6621
6622 Prepared_stmt_arena_holder ps_arena_holder(join->thd);
6623
6624 while ((tbl= child_li++))
6625 {
6626 if (tbl->table &&
6627 !(sj_nest->nested_join->used_tables & tbl->map()))
6628 {
6629 /*
6630 Pull the table up in the same way as simplify_joins() does:
6631 update join_list and embedding pointers but keep next[_local]
6632 pointers.
6633 */
6634 child_li.remove();
6635
6636 if (upper_join_list->push_back(tbl))
6637 DBUG_RETURN(TRUE);
6638
6639 tbl->join_list= upper_join_list;
6640 tbl->embedding= sj_nest->embedding;
6641 }
6642 }
6643
6644 /* Remove the sj-nest itself if we've removed everything from it */
6645 if (!sj_nest->nested_join->used_tables)
6646 {
6647 List_iterator<TABLE_LIST> li(*upper_join_list);
6648 /* Find the sj_nest in the list. */
6649 while (sj_nest != li++)
6650 {}
6651 li.remove();
6652 /* Also remove it from the list of SJ-nests: */
6653 sj_list_it.remove();
6654 }
6655 }
6656 }
6657 DBUG_RETURN(FALSE);
6658 }
6659
6660
6661 /**
6662 @defgroup RefOptimizerModule Ref Optimizer
6663
6664 @{
6665
6666 This module analyzes all equality predicates to determine the best
6667 independent ref/eq_ref/ref_or_null index access methods.
6668
6669 The 'ref' optimizer determines the columns (and expressions over them) that
6670 reference columns in other tables via an equality, and analyzes which keys
6671 and key parts can be used for index lookup based on these references. The
6672 main outcomes of the 'ref' optimizer are:
6673
6674 - A bi-directional graph of all equi-join conditions represented as an
6675 array of Key_use elements. This array is stored in JOIN::keyuse_array in
6676 table, key, keypart order. Each JOIN_TAB::keyuse points to the
6677 first Key_use element with the same table as JOIN_TAB::table.
6678
6679 - The table dependencies needed by the optimizer to determine what
6680 tables must be before certain table so that they provide the
6681 necessary column bindings for the equality predicates.
6682
6683 - Computed properties of the equality predicates such as null_rejecting
6684 and the result size of each separate condition.
6685
6686 Updates in JOIN_TAB:
6687 - JOIN_TAB::keys Bitmap of all used keys.
6688 - JOIN_TAB::const_keys Bitmap of all keys that may be used with quick_select.
6689 - JOIN_TAB::keyuse Pointer to possible keys.
6690 */
6691
6692 /**
6693 A Key_field is a descriptor of a predicate of the form (column <op> val).
6694 Currently 'op' is one of {'=', '<=>', 'IS [NOT] NULL', 'arg1 IN arg2'},
6695 and 'val' can be either another column or an expression (including constants).
6696
6697 Key_field's are used to analyze columns that may potentially serve as
6698 parts of keys for index lookup. If 'field' is part of an index, then
6699 add_key_part() creates a corresponding Key_use object and inserts it
6700 into the JOIN::keyuse_array which is passed by update_ref_and_keys().
6701
6702 The structure is used only during analysis of the candidate columns for
6703 index 'ref' access.
6704 */
6705 struct Key_field {
Key_fieldKey_field6706 Key_field(Item_field *item_field, Item *val, uint level,
6707 uint optimize, bool eq_func,
6708 bool null_rejecting, bool *cond_guard, uint sj_pred_no)
6709 : item_field(item_field), val(val), level(level),
6710 optimize(optimize), eq_func(eq_func),
6711 null_rejecting(null_rejecting), cond_guard(cond_guard),
6712 sj_pred_no(sj_pred_no)
6713 {}
6714 Item_field *item_field; ///< Item representing the column
6715 Item *val; ///< May be empty if diff constant
6716 uint level;
6717 uint optimize; ///< KEY_OPTIMIZE_*
6718 bool eq_func;
6719 /**
6720 If true, the condition this struct represents will not be satisfied
6721 when val IS NULL.
6722 @sa Key_use::null_rejecting .
6723 */
6724 bool null_rejecting;
6725 bool *cond_guard; ///< @sa Key_use::cond_guard
6726 uint sj_pred_no; ///< @sa Key_use::sj_pred_no
6727 };
6728
6729 /* Values in optimize */
6730 #define KEY_OPTIMIZE_EXISTS 1
6731 #define KEY_OPTIMIZE_REF_OR_NULL 2
6732
6733 /**
6734 Merge new key definitions to old ones, remove those not used in both.
6735
6736 This is called for OR between different levels.
6737
6738 To be able to do 'ref_or_null' we merge a comparison of a column
6739 and 'column IS NULL' to one test. This is useful for sub select queries
6740 that are internally transformed to something like:.
6741
6742 @code
6743 SELECT * FROM t1 WHERE t1.key=outer_ref_field or t1.key IS NULL
6744 @endcode
6745
6746 Key_field::null_rejecting is processed as follows: @n
6747 result has null_rejecting=true if it is set for both ORed references.
6748 for example:
6749 - (t2.key = t1.field OR t2.key = t1.field) -> null_rejecting=true
6750 - (t2.key = t1.field OR t2.key <=> t1.field) -> null_rejecting=false
6751
6752 @todo
6753 The result of this is that we're missing some 'ref' accesses.
6754 OptimizerTeam: Fix this
6755 */
6756
6757 static Key_field *
merge_key_fields(Key_field * start,Key_field * new_fields,Key_field * end,uint and_level)6758 merge_key_fields(Key_field *start, Key_field *new_fields, Key_field *end,
6759 uint and_level)
6760 {
6761 if (start == new_fields)
6762 return start; // Impossible or
6763 if (new_fields == end)
6764 return start; // No new fields, skip all
6765
6766 Key_field *first_free=new_fields;
6767
6768 /* Mark all found fields in old array */
6769 for (; new_fields != end ; new_fields++)
6770 {
6771 Field *const new_field= new_fields->item_field->field;
6772
6773 for (Key_field *old=start ; old != first_free ; old++)
6774 {
6775 Field *const old_field= old->item_field->field;
6776
6777 /*
6778 Check that the Field objects are the same, as we may have several
6779 Item_field objects pointing to the same Field:
6780 */
6781 if (old_field == new_field)
6782 {
6783 /*
6784 NOTE: below const_item() call really works as "!used_tables()", i.e.
6785 it can return FALSE where it is feasible to make it return TRUE.
6786
6787 The cause is as follows: Some of the tables are already known to be
6788 const tables (the detection code is in JOIN::make_join_plan(),
6789 above the update_ref_and_keys() call), but we didn't propagate
6790 information about this: TABLE::const_table is not set to TRUE, and
6791 Item::update_used_tables() hasn't been called for each item.
6792 The result of this is that we're missing some 'ref' accesses.
6793 TODO: OptimizerTeam: Fix this
6794 */
6795 if (!new_fields->val->const_item())
6796 {
6797 /*
6798 If the value matches, we can use the key reference.
6799 If not, we keep it until we have examined all new values
6800 */
6801 if (old->val->eq(new_fields->val, old_field->binary()))
6802 {
6803 old->level= and_level;
6804 old->optimize= ((old->optimize & new_fields->optimize &
6805 KEY_OPTIMIZE_EXISTS) |
6806 ((old->optimize | new_fields->optimize) &
6807 KEY_OPTIMIZE_REF_OR_NULL));
6808 old->null_rejecting= (old->null_rejecting &&
6809 new_fields->null_rejecting);
6810 }
6811 }
6812 else if (old->eq_func && new_fields->eq_func &&
6813 old->val->eq_by_collation(new_fields->val,
6814 old_field->binary(),
6815 old_field->charset()))
6816 {
6817 old->level= and_level;
6818 old->optimize= ((old->optimize & new_fields->optimize &
6819 KEY_OPTIMIZE_EXISTS) |
6820 ((old->optimize | new_fields->optimize) &
6821 KEY_OPTIMIZE_REF_OR_NULL));
6822 old->null_rejecting= (old->null_rejecting &&
6823 new_fields->null_rejecting);
6824 }
6825 else if (old->eq_func && new_fields->eq_func &&
6826 ((old->val->const_item() && old->val->is_null()) ||
6827 new_fields->val->is_null()))
6828 {
6829 /* field = expression OR field IS NULL */
6830 old->level= and_level;
6831 old->optimize= KEY_OPTIMIZE_REF_OR_NULL;
6832 /*
6833 Remember the NOT NULL value unless the value does not depend
6834 on other tables.
6835 */
6836 if (!old->val->used_tables() && old->val->is_null())
6837 old->val= new_fields->val;
6838 /* The referred expression can be NULL: */
6839 old->null_rejecting= 0;
6840 }
6841 else
6842 {
6843 /*
6844 We are comparing two different const. In this case we can't
6845 use a key-lookup on this so it's better to remove the value
6846 and let the range optimizer handle it
6847 */
6848 if (old == --first_free) // If last item
6849 break;
6850 *old= *first_free; // Remove old value
6851 old--; // Retry this value
6852 }
6853 }
6854 }
6855 }
6856 /* Remove all not used items */
6857 for (Key_field *old=start ; old != first_free ;)
6858 {
6859 if (old->level != and_level)
6860 { // Not used in all levels
6861 if (old == --first_free)
6862 break;
6863 *old= *first_free; // Remove old value
6864 continue;
6865 }
6866 old++;
6867 }
6868 return first_free;
6869 }
6870
6871
6872 /**
6873 Given a field, return its index in semi-join's select list, or UINT_MAX
6874
6875 @param item_field Field to be looked up in select list
6876
6877 @retval =UINT_MAX Field is not from a semijoin-transformed subquery
6878 @retval <UINT_MAX Index in select list of subquery
6879
6880 @details
6881 Given a field, find its table; then see if the table is within a
6882 semi-join nest and if the field was in select list of the subquery
6883 (if subquery was part of a quantified comparison predicate), or
6884 the field was a result of subquery decorrelation.
6885 If it was, then return the field's index in the select list.
6886 The value is used by LooseScan strategy.
6887 */
6888
get_semi_join_select_list_index(Item_field * item_field)6889 static uint get_semi_join_select_list_index(Item_field *item_field)
6890 {
6891 TABLE_LIST *emb_sj_nest= item_field->table_ref->embedding;
6892 if (emb_sj_nest && emb_sj_nest->sj_cond())
6893 {
6894 List<Item> &items= emb_sj_nest->nested_join->sj_inner_exprs;
6895 List_iterator<Item> it(items);
6896 for (uint i= 0; i < items.elements; i++)
6897 {
6898 Item *sel_item= it++;
6899 if (sel_item->type() == Item::FIELD_ITEM &&
6900 ((Item_field*)sel_item)->field->eq(item_field->field))
6901 return i;
6902 }
6903 }
6904 return UINT_MAX;
6905 }
6906
6907 /**
6908 @brief
6909 If EXPLAIN EXTENDED or if the --safe-updates option is enabled, add a
6910 warning that an index cannot be used for ref access
6911
6912 @details
6913 If EXPLAIN EXTENDED or if the --safe-updates option is enabled, add a
6914 warning for each index that cannot be used for ref access due to either type
6915 conversion or different collations on the field used for comparison
6916
6917 Example type conversion (char compared to int):
6918
6919 CREATE TABLE t1 (url char(1) PRIMARY KEY);
6920 SELECT * FROM t1 WHERE url=1;
6921
6922 Example different collations (danish vs german2):
6923
6924 CREATE TABLE t1 (url char(1) PRIMARY KEY) collate latin1_danish_ci;
6925 SELECT * FROM t1 WHERE url='1' collate latin1_german2_ci;
6926
6927 @param thd Thread for the connection that submitted the query
6928 @param field Field used in comparision
6929 @param cant_use_index Indexes that cannot be used for lookup
6930 */
6931 static void
warn_index_not_applicable(THD * thd,const Field * field,const key_map cant_use_index)6932 warn_index_not_applicable(THD *thd, const Field *field,
6933 const key_map cant_use_index)
6934 {
6935 if (thd->lex->describe ||
6936 thd->variables.option_bits & OPTION_SAFE_UPDATES)
6937 for (uint j=0 ; j < field->table->s->keys ; j++)
6938 if (cant_use_index.is_set(j))
6939 push_warning_printf(thd,
6940 Sql_condition::SL_WARNING,
6941 ER_WARN_INDEX_NOT_APPLICABLE,
6942 ER(ER_WARN_INDEX_NOT_APPLICABLE),
6943 "ref",
6944 field->table->key_info[j].name,
6945 field->field_name);
6946 }
6947
6948 /**
6949 Add a possible key to array of possible keys if it's usable as a key
6950
6951 @param key_fields[in,out] Used as an input paramater in the sense that it is a
6952 pointer to a pointer to a memory area where an array of Key_field objects will
6953 stored. It is used as an out parameter in the sense that the pointer will be
6954 updated to point beyond the last Key_field written.
6955
6956 @param and_level And level, to be stored in Key_field
6957 @param cond Condition predicate
6958 @param field Field used in comparision
6959 @param eq_func True if we used =, <=> or IS NULL
6960 @param value Array of values used for comparison with field
6961 @param num_values Number of elements in the array of values
6962 @param usable_tables Tables which can be used for key optimization
6963 @param sargables IN/OUT Array of found sargable candidates. Will be
6964 ignored in case eq_func is true.
6965
6966 @note
6967 If we are doing a NOT NULL comparison on a NOT NULL field in a outer join
6968 table, we store this to be able to do not exists optimization later.
6969
6970 @return
6971 *key_fields is incremented if we stored a key in the array
6972 */
6973
6974 static void
add_key_field(Key_field ** key_fields,uint and_level,Item_func * cond,Item_field * item_field,bool eq_func,Item ** value,uint num_values,table_map usable_tables,SARGABLE_PARAM ** sargables)6975 add_key_field(Key_field **key_fields, uint and_level, Item_func *cond,
6976 Item_field *item_field, bool eq_func, Item **value,
6977 uint num_values, table_map usable_tables,
6978 SARGABLE_PARAM **sargables)
6979 {
6980 assert(eq_func || sargables);
6981
6982 Field *const field= item_field->field;
6983 TABLE_LIST *const tl= item_field->table_ref;
6984
6985 if (tl->table->reginfo.join_tab == NULL)
6986 {
6987 /*
6988 Due to a bug in IN-to-EXISTS (grep for real_item() in item_subselect.cc
6989 for more info), an index over a field from an outer query might be
6990 considered here, which is incorrect. Their query has been fully
6991 optimized already so their reginfo.join_tab is NULL and we reject them.
6992 */
6993 return;
6994 }
6995
6996 DBUG_PRINT("info", ("add_key_field for field %s", field->field_name));
6997 uint exists_optimize= 0;
6998 if (!tl->derived_keys_ready && tl->uses_materialization() &&
6999 !tl->table->is_created() &&
7000 tl->update_derived_keys(field, value, num_values))
7001 return;
7002 if (!(field->flags & PART_KEY_FLAG))
7003 {
7004 // Don't remove column IS NULL on a LEFT JOIN table
7005 if (!eq_func || (*value)->type() != Item::NULL_ITEM ||
7006 !tl->table->is_nullable() || field->real_maybe_null())
7007 return; // Not a key. Skip it
7008 exists_optimize= KEY_OPTIMIZE_EXISTS;
7009 assert(num_values == 1);
7010 }
7011 else
7012 {
7013 table_map used_tables= 0;
7014 bool optimizable= false;
7015 for (uint i=0; i<num_values; i++)
7016 {
7017 used_tables|=(value[i])->used_tables();
7018 if (!((value[i])->used_tables() & (tl->map() | RAND_TABLE_BIT)))
7019 optimizable= true;
7020 }
7021 if (!optimizable)
7022 return;
7023 if (!(usable_tables & tl->map()))
7024 {
7025 if (!eq_func || (*value)->type() != Item::NULL_ITEM ||
7026 !tl->table->is_nullable() || field->real_maybe_null())
7027 return; // Can't use left join optimize
7028 exists_optimize= KEY_OPTIMIZE_EXISTS;
7029 }
7030 else
7031 {
7032 JOIN_TAB *stat= tl->table->reginfo.join_tab;
7033 key_map possible_keys=field->key_start;
7034 possible_keys.intersect(tl->table->keys_in_use_for_query);
7035 stat[0].keys().merge(possible_keys); // Add possible keys
7036
7037 /*
7038 Save the following cases:
7039 Field op constant
7040 Field LIKE constant where constant doesn't start with a wildcard
7041 Field = field2 where field2 is in a different table
7042 Field op formula
7043 Field IS NULL
7044 Field IS NOT NULL
7045 Field BETWEEN ...
7046 Field IN ...
7047 */
7048 stat[0].key_dependent|=used_tables;
7049
7050 bool is_const= true;
7051 for (uint i=0; i<num_values; i++)
7052 {
7053 if (!(is_const&= value[i]->const_item()))
7054 break;
7055 }
7056 if (is_const)
7057 stat[0].const_keys.merge(possible_keys);
7058 else if (!eq_func)
7059 {
7060 /*
7061 Save info to be able check whether this predicate can be
7062 considered as sargable for range analysis after reading const tables.
7063 We do not save info about equalities as update_const_equal_items
7064 will take care of updating info on keys from sargable equalities.
7065 */
7066 assert(sargables);
7067 (*sargables)--;
7068 /*
7069 The sargables and key_fields arrays share the same memory
7070 buffer, and grow from opposite directions, so make sure they
7071 don't cross.
7072 */
7073 assert(*sargables > *reinterpret_cast<SARGABLE_PARAM**>(key_fields));
7074 (*sargables)->field= field;
7075 (*sargables)->arg_value= value;
7076 (*sargables)->num_values= num_values;
7077 }
7078 /*
7079 We can't always use indexes when comparing a string index to a
7080 number. cmp_type() is checked to allow compare of dates to numbers.
7081 eq_func is NEVER true when num_values > 1
7082 */
7083 if (!eq_func)
7084 return;
7085
7086 /*
7087 Check if the field and value are comparable in the index.
7088 @todo: This code is almost identical to comparable_in_index()
7089 in opt_range.cc. Consider replacing the checks below with a
7090 function call to comparable_in_index()
7091 */
7092 if (field->result_type() == STRING_RESULT)
7093 {
7094 if ((*value)->result_type() != STRING_RESULT)
7095 {
7096 if (field->cmp_type() != (*value)->result_type())
7097 {
7098 warn_index_not_applicable(stat->join()->thd, field, possible_keys);
7099 return;
7100 }
7101 }
7102 else
7103 {
7104 /*
7105 Can't optimize datetime_column=indexed_varchar_column,
7106 also can't use indexes if the effective collation
7107 of the operation differ from the field collation.
7108 IndexedTimeComparedToDate: can't optimize
7109 'indexed_time = temporal_expr_with_date_part' because:
7110 - without index, a TIME column with value '48:00:00' is equal to a
7111 DATETIME column with value 'CURDATE() + 2 days'
7112 - with ref access into the TIME column, CURDATE() + 2 days becomes
7113 "00:00:00" (Field_timef::store_internal() simply extracts the time
7114 part from the datetime) which is a lookup key which does not match
7115 "48:00:00"; so ref access is not be able to give the same result
7116 as without index, so is disabled.
7117 On the other hand, we can optimize indexed_datetime = time
7118 because Field_temporal_with_date::store_time() will convert
7119 48:00:00 to CURDATE() + 2 days which is the correct lookup key.
7120 */
7121 if ((!field->is_temporal() && value[0]->is_temporal()) ||
7122 (field->cmp_type() == STRING_RESULT &&
7123 field->charset() != cond->compare_collation()) ||
7124 field_time_cmp_date(field, value[0]))
7125 {
7126 warn_index_not_applicable(stat->join()->thd, field, possible_keys);
7127 return;
7128 }
7129 }
7130 }
7131
7132 /*
7133 We can't use indexes when comparing to a JSON value. For example,
7134 the string '{}' should compare equal to the JSON string "{}". If
7135 we use a string index to compare the two strings, we will be
7136 comparing '{}' and '"{}"', which don't compare equal.
7137 */
7138 if (value[0]->result_type() == STRING_RESULT &&
7139 value[0]->field_type() == MYSQL_TYPE_JSON)
7140 {
7141 warn_index_not_applicable(stat->join()->thd, field, possible_keys);
7142 return;
7143 }
7144 }
7145 }
7146 /*
7147 For the moment eq_func is always true. This slot is reserved for future
7148 extensions where we want to remembers other things than just eq comparisons
7149 */
7150 assert(eq_func);
7151 /*
7152 If the condition has form "tbl.keypart = othertbl.field" and
7153 othertbl.field can be NULL, there will be no matches if othertbl.field
7154 has NULL value.
7155 We use null_rejecting in add_not_null_conds() to add
7156 'othertbl.field IS NOT NULL' to tab->m_condition, if this is not an outer
7157 join. We also use it to shortcut reading "tbl" when othertbl.field is
7158 found to be a NULL value (in join_read_always_key() and BKA).
7159 */
7160 Item *const real= (*value)->real_item();
7161 const bool null_rejecting=
7162 ((cond->functype() == Item_func::EQ_FUNC) ||
7163 (cond->functype() == Item_func::MULT_EQUAL_FUNC)) &&
7164 (real->type() == Item::FIELD_ITEM) &&
7165 ((Item_field*)real)->field->maybe_null();
7166
7167 /* Store possible eq field */
7168 new (*key_fields)
7169 Key_field(item_field, *value, and_level, exists_optimize, eq_func,
7170 null_rejecting, NULL,
7171 get_semi_join_select_list_index(item_field));
7172 (*key_fields)++;
7173 /*
7174 The sargables and key_fields arrays share the same memory buffer,
7175 and grow from opposite directions, so make sure they don't
7176 cross. But if sargables was NULL, eq_func had to be true and we
7177 don't write any sargables.
7178 */
7179 assert(sargables == NULL ||
7180 *key_fields < *reinterpret_cast<Key_field**>(sargables));
7181 }
7182
7183 /**
7184 Add possible keys to array of possible keys originated from a simple
7185 predicate.
7186
7187 @param key_fields Pointer to add key, if usable
7188 @param and_level And level, to be stored in Key_field
7189 @param cond Condition predicate
7190 @param field_item Field used in comparision
7191 @param eq_func True if we used =, <=> or IS NULL
7192 @param val Value used for comparison with field
7193 Is NULL for BETWEEN and IN
7194 @param usable_tables Tables which can be used for key optimization
7195 @param sargables IN/OUT Array of found sargable candidates
7196
7197 @note
7198 If field items f1 and f2 belong to the same multiple equality and
7199 a key is added for f1, the the same key is added for f2.
7200
7201 @returns
7202 *key_fields is incremented if we stored a key in the array
7203 */
7204
7205 static void
add_key_equal_fields(Key_field ** key_fields,uint and_level,Item_func * cond,Item_field * field_item,bool eq_func,Item ** val,uint num_values,table_map usable_tables,SARGABLE_PARAM ** sargables)7206 add_key_equal_fields(Key_field **key_fields, uint and_level,
7207 Item_func *cond, Item_field *field_item,
7208 bool eq_func, Item **val,
7209 uint num_values, table_map usable_tables,
7210 SARGABLE_PARAM **sargables)
7211 {
7212 DBUG_ENTER("add_key_equal_fields");
7213
7214 add_key_field(key_fields, and_level, cond, field_item,
7215 eq_func, val, num_values, usable_tables, sargables);
7216 Item_equal *item_equal= field_item->item_equal;
7217 if (item_equal)
7218 {
7219 /*
7220 Add to the set of possible key values every substitution of
7221 the field for an equal field included into item_equal
7222 */
7223 Item_equal_iterator it(*item_equal);
7224 Item_field *item;
7225 while ((item= it++))
7226 {
7227 if (!field_item->field->eq(item->field))
7228 add_key_field(key_fields, and_level, cond, item,
7229 eq_func, val, num_values, usable_tables,
7230 sargables);
7231 }
7232 }
7233 DBUG_VOID_RETURN;
7234 }
7235
7236
7237 /**
7238 Check if an expression is a non-outer field.
7239
7240 Checks if an expression is a field and belongs to the current select.
7241
7242 @param field Item expression to check
7243
7244 @return boolean
7245 @retval TRUE the expression is a local field
7246 @retval FALSE it's something else
7247 */
7248
7249 static bool
is_local_field(Item * field)7250 is_local_field (Item *field)
7251 {
7252 return field->real_item()->type() == Item::FIELD_ITEM &&
7253 !(field->used_tables() & OUTER_REF_TABLE_BIT) &&
7254 !down_cast<Item_ident *>(field)->depended_from &&
7255 !down_cast<Item_ident *>(field->real_item())->depended_from;
7256 }
7257
7258
7259 /**
7260 Check if a row constructor expression is over columns in the same query block.
7261
7262 @param item_row Row expression to check.
7263
7264 @return boolean
7265 @retval true The expression is a local column reference.
7266 @retval false It's something else.
7267 */
is_row_of_local_columns(Item_row * item_row)7268 static bool is_row_of_local_columns(Item_row *item_row)
7269 {
7270 for (uint i= 0; i < item_row->cols(); ++i)
7271 if (!is_local_field(item_row->element_index(i)))
7272 return false;
7273 return true;
7274 }
7275
7276
7277 /**
7278 The guts of the ref optimizer. This function, along with the other
7279 add_key_* functions, make up a recursive procedure that analyzes a
7280 condition expression (a tree of AND and OR predicates) and does
7281 many things.
7282
7283 @param join The query block involving the condition.
7284
7285 @param key_fields[in,out] Start of memory buffer, see below.
7286 @param and_level[in, out] Current 'and level', see below.
7287 @param cond The conditional expression to analyze.
7288 @param usable_tables Tables not in this bitmap will not be examined.
7289 @param sargables [in,out] End of memory buffer, see below.
7290
7291 This documentation is the result of reverse engineering and may
7292 therefore not capture the full gist of the procedure, but it is
7293 known to do the following:
7294
7295 - Populate a raw memory buffer from two directions at the same time. An
7296 'array' of Key_field objects fill the buffer from low to high addresses
7297 whilst an 'array' of SARGABLE_PARAM's fills the buffer from high to low
7298 addresses. At the first call to this function, it is assumed that
7299 key_fields points to the beginning of the buffer and sargables point to the
7300 end (except for a poor-mans 'null element' at the very end).
7301
7302 - Update a number of properties in the JOIN_TAB's that can be used
7303 to find search keys (sargables).
7304
7305 - JOIN_TAB::keys
7306 - JOIN_TAB::key_dependent
7307 - JOIN_TAB::const_keys (dictates if the range optimizer will be run
7308 later.)
7309
7310 The Key_field objects are marked with something called an 'and_level', which
7311 does @b not correspond to their nesting depth within the expression tree. It
7312 is rather a tag to group conjunctions together. For instance, in the
7313 conditional expression
7314
7315 @code
7316 a = 0 AND b = 0
7317 @endcode
7318
7319 two Key_field's are produced, both having an and_level of 0.
7320
7321 In an expression such as
7322
7323 @code
7324 a = 0 AND b = 0 OR a = 1
7325 @endcode
7326
7327 three Key_field's are produced, the first two corresponding to 'a = 0' and
7328 'b = 0', respectively, both with and_level 0. The third one corresponds to
7329 'a = 1' and has an and_level of 1.
7330
7331 A separate function, merge_key_fields() performs ref access validation on
7332 the Key_field array on the recursice ascent. If some Key_field's cannot be
7333 used for ref access, the key_fields pointer is rolled back. All other
7334 modifications to the query plan remain.
7335 */
7336 static void
add_key_fields(JOIN * join,Key_field ** key_fields,uint * and_level,Item * cond,table_map usable_tables,SARGABLE_PARAM ** sargables)7337 add_key_fields(JOIN *join, Key_field **key_fields, uint *and_level,
7338 Item *cond, table_map usable_tables,
7339 SARGABLE_PARAM **sargables)
7340 {
7341 DBUG_ENTER("add_key_fields");
7342 if (cond->type() == Item_func::COND_ITEM)
7343 {
7344 List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
7345 Key_field *org_key_fields= *key_fields;
7346
7347 if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
7348 {
7349 Item *item;
7350 while ((item=li++))
7351 add_key_fields(join, key_fields, and_level, item, usable_tables,
7352 sargables);
7353 for (; org_key_fields != *key_fields ; org_key_fields++)
7354 org_key_fields->level= *and_level;
7355 }
7356 else
7357 {
7358 (*and_level)++;
7359 add_key_fields(join, key_fields, and_level, li++, usable_tables,
7360 sargables);
7361 Item *item;
7362 while ((item=li++))
7363 {
7364 Key_field *start_key_fields= *key_fields;
7365 (*and_level)++;
7366 add_key_fields(join, key_fields, and_level, item, usable_tables,
7367 sargables);
7368 *key_fields=merge_key_fields(org_key_fields,start_key_fields,
7369 *key_fields,++(*and_level));
7370 }
7371 }
7372 DBUG_VOID_RETURN;
7373 }
7374
7375 /*
7376 Subquery optimization: Conditions that are pushed down into subqueries
7377 are wrapped into Item_func_trig_cond. We process the wrapped condition
7378 but need to set cond_guard for Key_use elements generated from it.
7379 */
7380 {
7381 if (cond->type() == Item::FUNC_ITEM &&
7382 ((Item_func*)cond)->functype() == Item_func::TRIG_COND_FUNC)
7383 {
7384 Item *cond_arg= ((Item_func*)cond)->arguments()[0];
7385 if (!join->group_list && !join->order &&
7386 join->unit->item &&
7387 join->unit->item->substype() == Item_subselect::IN_SUBS &&
7388 !join->unit->is_union())
7389 {
7390 Key_field *save= *key_fields;
7391 add_key_fields(join, key_fields, and_level, cond_arg, usable_tables,
7392 sargables);
7393 // Indicate that this ref access candidate is for subquery lookup:
7394 for (; save != *key_fields; save++)
7395 save->cond_guard= ((Item_func_trig_cond*)cond)->get_trig_var();
7396 }
7397 DBUG_VOID_RETURN;
7398 }
7399 }
7400
7401 /* If item is of type 'field op field/constant' add it to key_fields */
7402 if (cond->type() != Item::FUNC_ITEM)
7403 DBUG_VOID_RETURN;
7404 Item_func *cond_func= (Item_func*) cond;
7405 switch (cond_func->select_optimize()) {
7406 case Item_func::OPTIMIZE_NONE:
7407 break;
7408 case Item_func::OPTIMIZE_KEY:
7409 {
7410 Item **values;
7411 /*
7412 Build list of possible keys for 'a BETWEEN low AND high'.
7413 It is handled similar to the equivalent condition
7414 'a >= low AND a <= high':
7415 */
7416 if (cond_func->functype() == Item_func::BETWEEN)
7417 {
7418 Item_field *field_item;
7419 bool equal_func= FALSE;
7420 uint num_values= 2;
7421 values= cond_func->arguments();
7422
7423 bool binary_cmp= (values[0]->real_item()->type() == Item::FIELD_ITEM)
7424 ? ((Item_field*)values[0]->real_item())->field->binary()
7425 : TRUE;
7426
7427 /*
7428 Additional optimization: If 'low = high':
7429 Handle as if the condition was "t.key = low".
7430 */
7431 if (!((Item_func_between*)cond_func)->negated &&
7432 values[1]->eq(values[2], binary_cmp))
7433 {
7434 equal_func= TRUE;
7435 num_values= 1;
7436 }
7437
7438 /*
7439 Append keys for 'field <cmp> value[]' if the
7440 condition is of the form::
7441 '<field> BETWEEN value[1] AND value[2]'
7442 */
7443 if (is_local_field (values[0]))
7444 {
7445 field_item= (Item_field *) (values[0]->real_item());
7446 add_key_equal_fields(key_fields, *and_level, cond_func,
7447 field_item, equal_func, &values[1],
7448 num_values, usable_tables, sargables);
7449 }
7450 /*
7451 Append keys for 'value[0] <cmp> field' if the
7452 condition is of the form:
7453 'value[0] BETWEEN field1 AND field2'
7454 */
7455 for (uint i= 1; i <= num_values; i++)
7456 {
7457 if (is_local_field (values[i]))
7458 {
7459 field_item= (Item_field *) (values[i]->real_item());
7460 add_key_equal_fields(key_fields, *and_level, cond_func,
7461 field_item, equal_func, values,
7462 1, usable_tables, sargables);
7463 }
7464 }
7465 } // if ( ... Item_func::BETWEEN)
7466
7467 // The predicate is IN or !=
7468 else if (is_local_field (cond_func->key_item()) &&
7469 !(cond_func->used_tables() & OUTER_REF_TABLE_BIT))
7470 {
7471 values= cond_func->arguments()+1;
7472 if (cond_func->functype() == Item_func::NE_FUNC &&
7473 is_local_field (cond_func->arguments()[1]))
7474 values--;
7475 assert(cond_func->functype() != Item_func::IN_FUNC ||
7476 cond_func->argument_count() != 2);
7477 add_key_equal_fields(key_fields, *and_level, cond_func,
7478 (Item_field*) (cond_func->key_item()->real_item()),
7479 0, values,
7480 cond_func->argument_count()-1,
7481 usable_tables, sargables);
7482 }
7483 else if (cond_func->functype() == Item_func::IN_FUNC &&
7484 cond_func->key_item()->type() == Item::ROW_ITEM)
7485 {
7486 /*
7487 The condition is (column1, column2, ... ) IN ((const1_1, const1_2), ...)
7488 and there is an index on (column1, column2, ...)
7489
7490 The code below makes sure that the row constructor on the lhs indeed
7491 contains only column references before calling add_key_field on them.
7492
7493 We can't do a ref access on IN, yet here we are. Why? We need
7494 to run add_key_field() only because it verifies that there are
7495 only constant expressions in the rows on the IN's rhs, see
7496 comment above the call to add_key_field() below.
7497
7498 Actually, We could in theory do a ref access if the IN rhs
7499 contained just a single row, but there is a hack in the parser
7500 causing such IN predicates be parsed as row equalities.
7501 */
7502 Item_row *lhs_row= static_cast<Item_row*>(cond_func->key_item());
7503 if (is_row_of_local_columns(lhs_row))
7504 {
7505 for (uint i= 0; i < lhs_row->cols(); ++i)
7506 {
7507 Item *const lhs_item= lhs_row->element_index(i)->real_item();
7508 assert(lhs_item->type() == Item::FIELD_ITEM);
7509 Item_field *const lhs_column= static_cast<Item_field*>(lhs_item);
7510 // j goes from 1 since arguments()[0] is the lhs of IN.
7511 for (uint j= 1; j < cond_func->argument_count(); ++j)
7512 {
7513 // Here we pick out the i:th column in the j:th row.
7514 Item *rhs_item= cond_func->arguments()[j];
7515 assert(rhs_item->type() == Item::ROW_ITEM);
7516 Item_row *rhs_row= static_cast<Item_row*>(rhs_item);
7517 assert(rhs_row->cols() == lhs_row->cols());
7518 Item **rhs_expr_ptr= rhs_row->addr(i);
7519 /*
7520 add_key_field() will write a Key_field on each call
7521 here, but we don't care, it will never be used. We only
7522 call it for the side effect: update JOIN_TAB::const_keys
7523 so the range optimizer can be invoked. We pass a
7524 scrap buffer and pointer here.
7525 */
7526 Key_field scrap_key_field= **key_fields;
7527 Key_field *scrap_key_field_ptr= &scrap_key_field;
7528 add_key_field(&scrap_key_field_ptr,
7529 *and_level,
7530 cond_func,
7531 lhs_column,
7532 true, // eq_func
7533 rhs_expr_ptr,
7534 1, // Number of expressions: one
7535 usable_tables,
7536 NULL); // sargables
7537 // The pointer is not supposed to increase by more than one.
7538 assert(scrap_key_field_ptr <= &scrap_key_field + 1);
7539 }
7540 }
7541 }
7542 }
7543 break;
7544 }
7545 case Item_func::OPTIMIZE_OP:
7546 {
7547 bool equal_func=(cond_func->functype() == Item_func::EQ_FUNC ||
7548 cond_func->functype() == Item_func::EQUAL_FUNC);
7549
7550 if (is_local_field (cond_func->arguments()[0]))
7551 {
7552 add_key_equal_fields(key_fields, *and_level, cond_func,
7553 (Item_field*) (cond_func->arguments()[0])->real_item(),
7554 equal_func,
7555 cond_func->arguments()+1, 1, usable_tables,
7556 sargables);
7557 }
7558 if (is_local_field (cond_func->arguments()[1]) &&
7559 cond_func->functype() != Item_func::LIKE_FUNC)
7560 {
7561 add_key_equal_fields(key_fields, *and_level, cond_func,
7562 (Item_field*) (cond_func->arguments()[1])->real_item(),
7563 equal_func,
7564 cond_func->arguments(),1,usable_tables,
7565 sargables);
7566 }
7567 break;
7568 }
7569 case Item_func::OPTIMIZE_NULL:
7570 /* column_name IS [NOT] NULL */
7571 if (is_local_field (cond_func->arguments()[0]) &&
7572 !(cond_func->used_tables() & OUTER_REF_TABLE_BIT))
7573 {
7574 Item *tmp=new Item_null;
7575 if (unlikely(!tmp)) // Should never be true
7576 DBUG_VOID_RETURN;
7577 add_key_equal_fields(key_fields, *and_level, cond_func,
7578 (Item_field*) (cond_func->arguments()[0])->real_item(),
7579 cond_func->functype() == Item_func::ISNULL_FUNC,
7580 &tmp, 1, usable_tables, sargables);
7581 }
7582 break;
7583 case Item_func::OPTIMIZE_EQUAL:
7584 Item_equal *item_equal= (Item_equal *) cond;
7585 Item *const_item= item_equal->get_const();
7586 if (const_item)
7587 {
7588 /*
7589 For each field field1 from item_equal consider the equality
7590 field1=const_item as a condition allowing an index access of the table
7591 with field1 by the keys value of field1.
7592 */
7593 Item_equal_iterator it(*item_equal);
7594 Item_field *item;
7595 while ((item= it++))
7596 {
7597 add_key_field(key_fields, *and_level, cond_func, item,
7598 TRUE, &const_item, 1, usable_tables, sargables);
7599 }
7600 }
7601 else
7602 {
7603 /*
7604 Consider all pairs of different fields included into item_equal.
7605 For each of them (field1, field1) consider the equality
7606 field1=field2 as a condition allowing an index access of the table
7607 with field1 by the keys value of field2.
7608 */
7609 Item_equal_iterator outer_it(*item_equal);
7610 Item_equal_iterator inner_it(*item_equal);
7611 Item_field *outer;
7612 while ((outer= outer_it++))
7613 {
7614 Item_field *inner;
7615 while ((inner= inner_it++))
7616 {
7617 if (!outer->field->eq(inner->field))
7618 add_key_field(key_fields, *and_level, cond_func, outer,
7619 true, (Item **) &inner, 1, usable_tables,
7620 sargables);
7621 }
7622 inner_it.rewind();
7623 }
7624 }
7625 break;
7626 }
7627 DBUG_VOID_RETURN;
7628 }
7629
7630
7631 /*
7632 Add all keys with uses 'field' for some keypart
7633 If field->and_level != and_level then only mark key_part as const_part
7634
7635 RETURN
7636 0 - OK
7637 1 - Out of memory.
7638 */
7639
7640 static bool
add_key_part(Key_use_array * keyuse_array,Key_field * key_field)7641 add_key_part(Key_use_array *keyuse_array, Key_field *key_field)
7642 {
7643 if (key_field->eq_func && !(key_field->optimize & KEY_OPTIMIZE_EXISTS))
7644 {
7645 Field *const field= key_field->item_field->field;
7646 TABLE_LIST *const tl= key_field->item_field->table_ref;
7647 TABLE *const table= tl->table;
7648
7649 for (uint key=0 ; key < table->s->keys ; key++)
7650 {
7651 if (!(table->keys_in_use_for_query.is_set(key)))
7652 continue;
7653 if (table->key_info[key].flags & (HA_FULLTEXT | HA_SPATIAL))
7654 continue; // ToDo: ft-keys in non-ft queries. SerG
7655
7656 uint key_parts= actual_key_parts(&table->key_info[key]);
7657 for (uint part=0 ; part < key_parts ; part++)
7658 {
7659 if (field->eq(table->key_info[key].key_part[part].field))
7660 {
7661 const Key_use keyuse(tl,
7662 key_field->val,
7663 key_field->val->used_tables(),
7664 key,
7665 part,
7666 key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL,
7667 (key_part_map) 1 << part,
7668 ~(ha_rows) 0, // will be set in optimize_keyuse
7669 key_field->null_rejecting,
7670 key_field->cond_guard,
7671 key_field->sj_pred_no);
7672 if (keyuse_array->push_back(keyuse))
7673 return true; /* purecov: inspected */
7674 }
7675 }
7676 }
7677 }
7678 return false;
7679 }
7680
7681
7682 /**
7683 Function parses WHERE condition and add key_use for FT index
7684 into key_use array if suitable MATCH function is found.
7685 Condition should be a set of AND expression, OR is not supported.
7686 MATCH function should be a part of simple expression.
7687 Simple expression is MATCH only function or MATCH is a part of
7688 comparison expression ('>=' or '>' operations are supported).
7689 It also sets FT_HINTS values(op_type, op_value).
7690
7691 @param keyuse_array Key_use array
7692 @param stat JOIN_TAB structure
7693 @param cond WHERE condition
7694 @param usable_tables usable tables
7695 @param simple_match_expr true if this is the first call false otherwise.
7696 if MATCH function is found at first call it means
7697 that MATCH is simple expression, otherwise, in case
7698 of AND/OR condition this parameter will be false.
7699
7700 @retval
7701 true if FT key was added to Key_use array
7702 @retval
7703 false if no key was added to Key_use array
7704
7705 */
7706
7707 static bool
add_ft_keys(Key_use_array * keyuse_array,JOIN_TAB * stat,Item * cond,table_map usable_tables,bool simple_match_expr)7708 add_ft_keys(Key_use_array *keyuse_array,
7709 JOIN_TAB *stat,Item *cond,table_map usable_tables,
7710 bool simple_match_expr)
7711 {
7712 Item_func_match *cond_func=NULL;
7713
7714 if (!cond)
7715 return FALSE;
7716
7717 if (cond->type() == Item::FUNC_ITEM)
7718 {
7719 Item_func *func=(Item_func *)cond;
7720 Item_func::Functype functype= func->functype();
7721 enum ft_operation op_type= FT_OP_NO;
7722 double op_value= 0.0;
7723 if (functype == Item_func::FT_FUNC)
7724 {
7725 cond_func= ((Item_func_match *) cond)->get_master();
7726 cond_func->set_hints_op(op_type, op_value);
7727 }
7728 else if (func->arg_count == 2)
7729 {
7730 Item *arg0=(func->arguments()[0]),
7731 *arg1=(func->arguments()[1]);
7732 if (arg1->const_item() &&
7733 arg0->type() == Item::FUNC_ITEM &&
7734 ((Item_func *) arg0)->functype() == Item_func::FT_FUNC &&
7735 ((functype == Item_func::GE_FUNC &&
7736 (op_value= arg1->val_real()) > 0) ||
7737 (functype == Item_func::GT_FUNC &&
7738 (op_value= arg1->val_real()) >=0)))
7739 {
7740 cond_func= ((Item_func_match *) arg0)->get_master();
7741 if (functype == Item_func::GE_FUNC)
7742 op_type= FT_OP_GE;
7743 else if (functype == Item_func::GT_FUNC)
7744 op_type= FT_OP_GT;
7745 cond_func->set_hints_op(op_type, op_value);
7746 }
7747 else if (arg0->const_item() &&
7748 arg1->type() == Item::FUNC_ITEM &&
7749 ((Item_func *) arg1)->functype() == Item_func::FT_FUNC &&
7750 ((functype == Item_func::LE_FUNC &&
7751 (op_value= arg0->val_real()) > 0) ||
7752 (functype == Item_func::LT_FUNC &&
7753 (op_value= arg0->val_real()) >=0)))
7754 {
7755 cond_func= ((Item_func_match *) arg1)->get_master();
7756 if (functype == Item_func::LE_FUNC)
7757 op_type= FT_OP_GE;
7758 else if (functype == Item_func::LT_FUNC)
7759 op_type= FT_OP_GT;
7760 cond_func->set_hints_op(op_type, op_value);
7761 }
7762 }
7763 }
7764 else if (cond->type() == Item::COND_ITEM)
7765 {
7766 List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
7767
7768 if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
7769 {
7770 Item *item;
7771 while ((item=li++))
7772 {
7773 if (add_ft_keys(keyuse_array, stat, item, usable_tables, false))
7774 return TRUE;
7775 }
7776 }
7777 }
7778
7779 if (!cond_func || cond_func->key == NO_SUCH_KEY ||
7780 !(usable_tables & cond_func->table_ref->map()))
7781 return FALSE;
7782
7783 cond_func->set_simple_expression(simple_match_expr);
7784
7785 const Key_use keyuse(cond_func->table_ref,
7786 cond_func,
7787 cond_func->key_item()->used_tables(),
7788 cond_func->key,
7789 FT_KEYPART,
7790 0, // optimize
7791 0, // keypart_map
7792 ~(ha_rows)0, // ref_table_rows
7793 false, // null_rejecting
7794 NULL, // cond_guard
7795 UINT_MAX); // sj_pred_no
7796 return keyuse_array->push_back(keyuse);
7797 }
7798
7799
7800 /**
7801 Compares two keyuse elements.
7802
7803 @param a first Key_use element
7804 @param b second Key_use element
7805
7806 Compare Key_use elements so that they are sorted as follows:
7807 -# By table.
7808 -# By key for each table.
7809 -# By keypart for each key.
7810 -# Const values.
7811 -# Ref_or_null.
7812
7813 @retval 0 If a = b.
7814 @retval <0 If a < b.
7815 @retval >0 If a > b.
7816 */
sort_keyuse(Key_use * a,Key_use * b)7817 static int sort_keyuse(Key_use *a, Key_use *b)
7818 {
7819 int res;
7820 if (a->table_ref->tableno() != b->table_ref->tableno())
7821 return (int) (a->table_ref->tableno() - b->table_ref->tableno());
7822 if (a->key != b->key)
7823 return (int) (a->key - b->key);
7824 if (a->keypart != b->keypart)
7825 return (int) (a->keypart - b->keypart);
7826 // Place const values before other ones
7827 if ((res= MY_TEST((a->used_tables & ~OUTER_REF_TABLE_BIT)) -
7828 MY_TEST((b->used_tables & ~OUTER_REF_TABLE_BIT))))
7829 return res;
7830 /* Place rows that are not 'OPTIMIZE_REF_OR_NULL' first */
7831 return (int) ((a->optimize & KEY_OPTIMIZE_REF_OR_NULL) -
7832 (b->optimize & KEY_OPTIMIZE_REF_OR_NULL));
7833 }
7834
7835
7836 /*
7837 Add to Key_field array all 'ref' access candidates within nested join.
7838
7839 This function populates Key_field array with entries generated from the
7840 ON condition of the given nested join, and does the same for nested joins
7841 contained within this nested join.
7842
7843 @param[in] nested_join_table Nested join pseudo-table to process
7844 @param[in,out] end End of the key field array
7845 @param[in,out] and_level And-level
7846 @param[in,out] sargables Array of found sargable candidates
7847
7848
7849 @note
7850 We can add accesses to the tables that are direct children of this nested
7851 join (1), and are not inner tables w.r.t their neighbours (2).
7852
7853 Example for #1 (outer brackets pair denotes nested join this function is
7854 invoked for):
7855 @code
7856 ... LEFT JOIN (t1 LEFT JOIN (t2 ... ) ) ON cond
7857 @endcode
7858 Example for #2:
7859 @code
7860 ... LEFT JOIN (t1 LEFT JOIN t2 ) ON cond
7861 @endcode
7862 In examples 1-2 for condition cond, we can add 'ref' access candidates to
7863 t1 only.
7864 Example #3:
7865 @code
7866 ... LEFT JOIN (t1, t2 LEFT JOIN t3 ON inner_cond) ON cond
7867 @endcode
7868 Here we can add 'ref' access candidates for t1 and t2, but not for t3.
7869 */
7870
add_key_fields_for_nj(JOIN * join,TABLE_LIST * nested_join_table,Key_field ** end,uint * and_level,SARGABLE_PARAM ** sargables)7871 static void add_key_fields_for_nj(JOIN *join, TABLE_LIST *nested_join_table,
7872 Key_field **end, uint *and_level,
7873 SARGABLE_PARAM **sargables)
7874 {
7875 List_iterator<TABLE_LIST> li(nested_join_table->nested_join->join_list);
7876 List_iterator<TABLE_LIST> li2(nested_join_table->nested_join->join_list);
7877 bool have_another = FALSE;
7878 table_map tables= 0;
7879 TABLE_LIST *table;
7880 assert(nested_join_table->nested_join);
7881
7882 while ((table= li++) || (have_another && (li=li2, have_another=FALSE,
7883 (table= li++))))
7884 {
7885 if (table->nested_join)
7886 {
7887 if (!table->join_cond_optim())
7888 {
7889 /* It's a semi-join nest. Walk into it as if it wasn't a nest */
7890 have_another= TRUE;
7891 li2= li;
7892 li= List_iterator<TABLE_LIST>(table->nested_join->join_list);
7893 }
7894 else
7895 add_key_fields_for_nj(join, table, end, and_level, sargables);
7896 }
7897 else
7898 if (!table->join_cond_optim())
7899 tables|= table->map();
7900 }
7901 if (nested_join_table->join_cond_optim())
7902 add_key_fields(join, end, and_level, nested_join_table->join_cond_optim(),
7903 tables, sargables);
7904 }
7905
7906
7907 /// @} (end of group RefOptimizerModule)
7908
7909
7910 /**
7911 Check for the presence of AGGFN(DISTINCT a) queries that may be subject
7912 to loose index scan.
7913
7914
7915 Check if the query is a subject to AGGFN(DISTINCT) using loose index scan
7916 (QUICK_GROUP_MIN_MAX_SELECT).
7917 Optionally (if out_args is supplied) will push the arguments of
7918 AGGFN(DISTINCT) to the list
7919
7920 Check for every COUNT(DISTINCT), AVG(DISTINCT) or
7921 SUM(DISTINCT). These can be resolved by Loose Index Scan as long
7922 as all the aggregate distinct functions refer to the same
7923 fields. Thus:
7924
7925 SELECT AGGFN(DISTINCT a, b), AGGFN(DISTINCT b, a)... => can use LIS
7926 SELECT AGGFN(DISTINCT a), AGGFN(DISTINCT a) ... => can use LIS
7927 SELECT AGGFN(DISTINCT a, b), AGGFN(DISTINCT a) ... => cannot use LIS
7928 SELECT AGGFN(DISTINCT a), AGGFN(DISTINCT b) ... => cannot use LIS
7929 etc.
7930
7931 @param join the join to check
7932 @param[out] out_args Collect the arguments of the aggregate functions
7933 to a list. We don't worry about duplicates as
7934 these will be sorted out later in
7935 get_best_group_min_max.
7936
7937 @return does the query qualify for indexed AGGFN(DISTINCT)
7938 @retval true it does
7939 @retval false AGGFN(DISTINCT) must apply distinct in it.
7940 */
7941
7942 bool
is_indexed_agg_distinct(JOIN * join,List<Item_field> * out_args)7943 is_indexed_agg_distinct(JOIN *join, List<Item_field> *out_args)
7944 {
7945 Item_sum **sum_item_ptr;
7946 bool result= false;
7947 Field_map first_aggdistinct_fields;
7948
7949 if (join->primary_tables > 1 || /* reference more than 1 table */
7950 join->select_distinct || /* or a DISTINCT */
7951 join->select_lex->olap == ROLLUP_TYPE) /* Check (B3) for ROLLUP */
7952 return false;
7953
7954 if (join->make_sum_func_list(join->all_fields, join->fields_list, true))
7955 return false;
7956
7957 for (sum_item_ptr= join->sum_funcs; *sum_item_ptr; sum_item_ptr++)
7958 {
7959 Item_sum *sum_item= *sum_item_ptr;
7960 Field_map cur_aggdistinct_fields;
7961 Item *expr;
7962 /* aggregate is not AGGFN(DISTINCT) or more than 1 argument to it */
7963 switch (sum_item->sum_func())
7964 {
7965 case Item_sum::MIN_FUNC:
7966 case Item_sum::MAX_FUNC:
7967 continue;
7968 case Item_sum::COUNT_DISTINCT_FUNC:
7969 break;
7970 case Item_sum::AVG_DISTINCT_FUNC:
7971 case Item_sum::SUM_DISTINCT_FUNC:
7972 if (sum_item->get_arg_count() == 1)
7973 break;
7974 /* fall through */
7975 default: return false;
7976 }
7977
7978 for (uint i= 0; i < sum_item->get_arg_count(); i++)
7979 {
7980 expr= sum_item->get_arg(i);
7981 /* The AGGFN(DISTINCT) arg is not an attribute? */
7982 if (expr->real_item()->type() != Item::FIELD_ITEM)
7983 return false;
7984
7985 Item_field* item= static_cast<Item_field*>(expr->real_item());
7986 if (out_args)
7987 out_args->push_back(item);
7988
7989 cur_aggdistinct_fields.set_bit(item->field->field_index);
7990 result= true;
7991 }
7992 /*
7993 If there are multiple aggregate functions, make sure that they all
7994 refer to exactly the same set of columns.
7995 */
7996 if (first_aggdistinct_fields.is_clear_all())
7997 first_aggdistinct_fields.merge(cur_aggdistinct_fields);
7998 else if (first_aggdistinct_fields != cur_aggdistinct_fields)
7999 return false;
8000 }
8001
8002 return result;
8003 }
8004
8005
8006 /**
8007 Print keys that were appended to join_tab->const_keys because they
8008 can be used for GROUP BY or DISTINCT to the optimizer trace.
8009
8010 @param trace The optimizer trace context we're adding info to
8011 @param join_tab The table the indexes cover
8012 @param new_keys The keys that are considered useful because they can
8013 be used for GROUP BY or DISTINCT
8014 @param cause Zero-terminated string with reason for adding indexes
8015 to const_keys
8016
8017 @see add_group_and_distinct_keys()
8018 */
trace_indexes_added_group_distinct(Opt_trace_context * trace,const JOIN_TAB * join_tab,const key_map new_keys,const char * cause)8019 static void trace_indexes_added_group_distinct(Opt_trace_context *trace,
8020 const JOIN_TAB *join_tab,
8021 const key_map new_keys,
8022 const char* cause)
8023 {
8024 #ifdef OPTIMIZER_TRACE
8025 if (likely(!trace->is_started()))
8026 return;
8027
8028 KEY *key_info= join_tab->table()->key_info;
8029 key_map existing_keys= join_tab->const_keys;
8030 uint nbrkeys= join_tab->table()->s->keys;
8031
8032 Opt_trace_object trace_summary(trace, "const_keys_added");
8033 {
8034 Opt_trace_array trace_key(trace,"keys");
8035 for (uint j= 0 ; j < nbrkeys ; j++)
8036 if (new_keys.is_set(j) && !existing_keys.is_set(j))
8037 trace_key.add_utf8(key_info[j].name);
8038 }
8039 trace_summary.add_alnum("cause", cause);
8040 #endif
8041 }
8042
8043
8044 /**
8045 Discover the indexes that might be used for GROUP BY or DISTINCT queries.
8046
8047 If the query has a GROUP BY clause, find all indexes that contain
8048 all GROUP BY fields, and add those indexes to join_tab->const_keys
8049 and join_tab->keys.
8050
8051 If the query has a DISTINCT clause, find all indexes that contain
8052 all SELECT fields, and add those indexes to join_tab->const_keys and
8053 join_tab->keys. This allows later on such queries to be processed by
8054 a QUICK_GROUP_MIN_MAX_SELECT.
8055
8056 Note that indexes that are not usable for resolving GROUP
8057 BY/DISTINCT may also be added in some corner cases. For example, an
8058 index covering 'a' and 'b' is not usable for the following query but
8059 is still added: "SELECT DISTINCT a+b FROM t1". This is not a big
8060 issue because a) although the optimizer will consider using the
8061 index, it will not chose it (so minor calculation cost added but not
8062 wrong result) and b) it applies only to corner cases.
8063
8064 @param join
8065 @param join_tab
8066
8067 @return
8068 None
8069 */
8070
8071 static void
add_group_and_distinct_keys(JOIN * join,JOIN_TAB * join_tab)8072 add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab)
8073 {
8074 assert(join_tab->const_keys.is_subset(join_tab->keys()));
8075
8076 List<Item_field> indexed_fields;
8077 List_iterator<Item_field> indexed_fields_it(indexed_fields);
8078 ORDER *cur_group;
8079 Item_field *cur_item;
8080 const char *cause;
8081
8082 if (join->group_list)
8083 { /* Collect all query fields referenced in the GROUP clause. */
8084 for (cur_group= join->group_list; cur_group; cur_group= cur_group->next)
8085 (*cur_group->item)->walk(&Item::collect_item_field_processor,
8086 Item::WALK_POSTFIX,
8087 (uchar*) &indexed_fields);
8088 cause= "group_by";
8089 }
8090 else if (join->select_distinct)
8091 { /* Collect all query fields referenced in the SELECT clause. */
8092 List<Item> &select_items= join->fields_list;
8093 List_iterator<Item> select_items_it(select_items);
8094 Item *item;
8095 while ((item= select_items_it++))
8096 item->walk(&Item::collect_item_field_processor,
8097 Item::WALK_POSTFIX,
8098 (uchar*) &indexed_fields);
8099 cause= "distinct";
8100 }
8101 else if (join->tmp_table_param.sum_func_count &&
8102 is_indexed_agg_distinct(join, &indexed_fields))
8103 {
8104 /*
8105 SELECT list with AGGFN(distinct col). The query qualifies for
8106 loose index scan, and is_indexed_agg_distinct() has already
8107 collected all referenced fields into indexed_fields.
8108 */
8109 join->sort_and_group= 1;
8110 cause= "indexed_distinct_aggregate";
8111 }
8112 else
8113 return;
8114
8115 if (indexed_fields.elements == 0)
8116 return;
8117
8118 key_map possible_keys;
8119 possible_keys.set_all();
8120
8121 /* Intersect the keys of all group fields. */
8122 while ((cur_item= indexed_fields_it++))
8123 {
8124 if (cur_item->used_tables() != join_tab->table_ref->map())
8125 {
8126 /*
8127 Doing GROUP BY or DISTINCT on a field in another table so no
8128 index in this table is usable
8129 */
8130 return;
8131 }
8132 else
8133 possible_keys.intersect(cur_item->field->part_of_key);
8134 }
8135
8136 /*
8137 At this point, possible_keys has key bits set only for usable
8138 indexes because indexed_fields is non-empty and if any of the
8139 fields belong to a different table the function would exit in the
8140 loop above.
8141 */
8142
8143 if (!possible_keys.is_clear_all() &&
8144 !possible_keys.is_subset(join_tab->const_keys))
8145 {
8146 trace_indexes_added_group_distinct(&join->thd->opt_trace, join_tab,
8147 possible_keys, cause);
8148 join_tab->const_keys.merge(possible_keys);
8149 join_tab->keys().merge(possible_keys);
8150 }
8151
8152 assert(join_tab->const_keys.is_subset(join_tab->keys()));
8153 }
8154
8155 /**
8156 Update keyuse array with all possible keys we can use to fetch rows.
8157
8158 @param thd
8159 @param[out] keyuse Put here ordered array of Key_use structures
8160 @param join_tab Array in table number order
8161 @param tables Number of tables in join
8162 @param cond WHERE condition (note that the function analyzes
8163 join_tab[i]->join_cond() too)
8164 @param normal_tables Tables not inner w.r.t some outer join (ones
8165 for which we can make ref access based the WHERE
8166 clause)
8167 @param select_lex current SELECT
8168 @param[out] sargables Array of found sargable candidates
8169
8170 @retval
8171 0 OK
8172 @retval
8173 1 Out of memory.
8174 */
8175
8176 static bool
update_ref_and_keys(THD * thd,Key_use_array * keyuse,JOIN_TAB * join_tab,uint tables,Item * cond,COND_EQUAL * cond_equal,table_map normal_tables,SELECT_LEX * select_lex,SARGABLE_PARAM ** sargables)8177 update_ref_and_keys(THD *thd, Key_use_array *keyuse,JOIN_TAB *join_tab,
8178 uint tables, Item *cond, COND_EQUAL *cond_equal,
8179 table_map normal_tables, SELECT_LEX *select_lex,
8180 SARGABLE_PARAM **sargables)
8181 {
8182 uint and_level,i,found_eq_constant;
8183 Key_field *key_fields, *end, *field;
8184 size_t sz;
8185 uint m= max(select_lex->max_equal_elems, 1U);
8186 JOIN *const join= select_lex->join;
8187 /*
8188 We use the same piece of memory to store both Key_field
8189 and SARGABLE_PARAM structure.
8190 Key_field values are placed at the beginning this memory
8191 while SARGABLE_PARAM values are put at the end.
8192 All predicates that are used to fill arrays of Key_field
8193 and SARGABLE_PARAM structures have at most 2 arguments
8194 except BETWEEN predicates that have 3 arguments and
8195 IN predicates.
8196 This any predicate if it's not BETWEEN/IN can be used
8197 directly to fill at most 2 array elements, either of Key_field
8198 or SARGABLE_PARAM type. For a BETWEEN predicate 3 elements
8199 can be filled as this predicate is considered as
8200 saragable with respect to each of its argument.
8201 An IN predicate can require at most 1 element as currently
8202 it is considered as sargable only for its first argument.
8203 Multiple equality can add elements that are filled after
8204 substitution of field arguments by equal fields. There
8205 can be not more than select_lex->max_equal_elems such
8206 substitutions.
8207 */
8208 sz= max(sizeof(Key_field), sizeof(SARGABLE_PARAM)) *
8209 (((select_lex->cond_count + 1) * 2 +
8210 select_lex->between_count) * m + 1);
8211 if (!(key_fields=(Key_field*) thd->alloc(sz)))
8212 return TRUE; /* purecov: inspected */
8213 and_level= 0;
8214 field= end= key_fields;
8215 *sargables= (SARGABLE_PARAM *) key_fields +
8216 (sz - sizeof((*sargables)[0].field))/sizeof(SARGABLE_PARAM);
8217 /* set a barrier for the array of SARGABLE_PARAM */
8218 (*sargables)[0].field= 0;
8219
8220 if (cond)
8221 {
8222 add_key_fields(join, &end, &and_level, cond, normal_tables, sargables);
8223 for (Key_field *fld= field; fld != end ; fld++)
8224 {
8225 /* Mark that we can optimize LEFT JOIN */
8226 if (fld->val->type() == Item::NULL_ITEM &&
8227 !fld->item_field->field->real_maybe_null())
8228 {
8229 /*
8230 Example:
8231 SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.a WHERE t2.a IS NULL;
8232 this just wants rows of t1 where t1.a does not exist in t2.
8233 */
8234 fld->item_field->field->table->reginfo.not_exists_optimize= true;
8235 }
8236 }
8237 }
8238
8239 for (i=0 ; i < tables ; i++)
8240 {
8241 /*
8242 Block the creation of keys for inner tables of outer joins.
8243 Here only the outer joins that can not be converted to
8244 inner joins are left and all nests that can be eliminated
8245 are flattened.
8246 In the future when we introduce conditional accesses
8247 for inner tables in outer joins these keys will be taken
8248 into account as well.
8249 */
8250 if (join_tab[i].join_cond())
8251 add_key_fields(join, &end, &and_level,
8252 join_tab[i].join_cond(),
8253 join_tab[i].table_ref->map(), sargables);
8254 }
8255
8256 /* Process ON conditions for the nested joins */
8257 {
8258 List_iterator<TABLE_LIST> li(select_lex->top_join_list);
8259 TABLE_LIST *tl;
8260 while ((tl= li++))
8261 {
8262 if (tl->nested_join)
8263 add_key_fields_for_nj(join, tl, &end, &and_level, sargables);
8264 }
8265 }
8266
8267 /* Generate keys descriptions for derived tables */
8268 if (select_lex->materialized_derived_table_count)
8269 {
8270 if (join->generate_derived_keys())
8271 return true;
8272 }
8273 /* fill keyuse with found key parts */
8274 for ( ; field != end ; field++)
8275 {
8276 if (add_key_part(keyuse,field))
8277 return true;
8278 }
8279
8280 if (select_lex->ftfunc_list->elements)
8281 {
8282 if (add_ft_keys(keyuse, join_tab, cond, normal_tables, true))
8283 return true;
8284 }
8285
8286 /*
8287 Sort the array of possible keys and remove the following key parts:
8288 - ref if there is a keypart which is a ref and a const.
8289 (e.g. if there is a key(a,b) and the clause is a=3 and b=7 and b=t2.d,
8290 then we skip the key part corresponding to b=t2.d)
8291 - keyparts without previous keyparts
8292 (e.g. if there is a key(a,b,c) but only b < 5 (or a=2 and c < 3) is
8293 used in the query, we drop the partial key parts from consideration).
8294 Special treatment for ft-keys.
8295 */
8296 if (!keyuse->empty())
8297 {
8298 Key_use *save_pos, *use;
8299
8300 my_qsort(keyuse->begin(), keyuse->size(), keyuse->element_size(),
8301 reinterpret_cast<qsort_cmp>(sort_keyuse));
8302
8303 const Key_use key_end(NULL, NULL, 0, 0, 0, 0, 0, 0, false, NULL, 0);
8304 if (keyuse->push_back(key_end)) // added for easy testing
8305 return TRUE;
8306
8307 use= save_pos= keyuse->begin();
8308 const Key_use *prev= &key_end;
8309 found_eq_constant=0;
8310 for (i=0 ; i < keyuse->size()-1 ; i++,use++)
8311 {
8312 TABLE *const table= use->table_ref->table;
8313 if (!use->used_tables && use->optimize != KEY_OPTIMIZE_REF_OR_NULL)
8314 table->const_key_parts[use->key]|= use->keypart_map;
8315 if (use->keypart != FT_KEYPART)
8316 {
8317 if (use->key == prev->key && use->table_ref == prev->table_ref)
8318 {
8319 if (prev->keypart+1 < use->keypart ||
8320 (prev->keypart == use->keypart && found_eq_constant))
8321 continue; /* remove */
8322 }
8323 else if (use->keypart != 0) // First found must be 0
8324 continue;
8325 }
8326
8327 #if defined(__GNUC__) && !MY_GNUC_PREREQ(4,4)
8328 /*
8329 Old gcc used a memcpy(), which is undefined if save_pos==use:
8330 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19410
8331 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39480
8332 */
8333 if (save_pos != use)
8334 #endif
8335 *save_pos= *use;
8336 prev=use;
8337 found_eq_constant= !use->used_tables;
8338 /* Save ptr to first use */
8339 if (!table->reginfo.join_tab->keyuse())
8340 table->reginfo.join_tab->set_keyuse(save_pos);
8341 table->reginfo.join_tab->checked_keys.set_bit(use->key);
8342 save_pos++;
8343 }
8344 i= (uint) (save_pos - keyuse->begin());
8345 keyuse->at(i) = key_end;
8346 keyuse->chop(i);
8347 }
8348 print_keyuse_array(&thd->opt_trace, keyuse);
8349
8350 return false;
8351 }
8352
8353
8354 /**
8355 Create a keyuse array for a table with a primary key.
8356 To be used when creating a materialized temporary table.
8357
8358 @param thd THD pointer, for memory allocation
8359 @param table Table object representing table
8360 @param keyparts Number of key parts in the primary key
8361 @param outer_exprs List of items used for key lookup
8362
8363 @return Pointer to created keyuse array, or NULL if error
8364 */
create_keyuse_for_table(THD * thd,TABLE * table,uint keyparts,Item_field ** fields,List<Item> outer_exprs)8365 Key_use_array *create_keyuse_for_table(THD *thd, TABLE *table, uint keyparts,
8366 Item_field **fields,
8367 List<Item> outer_exprs)
8368 {
8369 void *mem= thd->alloc(sizeof(Key_use_array));
8370 if (!mem)
8371 return NULL;
8372 Key_use_array *keyuses= new (mem) Key_use_array(thd->mem_root);
8373
8374 List_iterator<Item> outer_expr(outer_exprs);
8375
8376 for (uint keypartno= 0; keypartno < keyparts; keypartno++)
8377 {
8378 Item *const item= outer_expr++;
8379 Key_field key_field(fields[keypartno], item, 0, 0, true,
8380 // null_rejecting must be true for field items only,
8381 // add_not_null_conds() is incapable of handling
8382 // other item types.
8383 (item->type() == Item::FIELD_ITEM),
8384 NULL, UINT_MAX);
8385 if (add_key_part(keyuses, &key_field))
8386 return NULL;
8387 }
8388 const Key_use key_end(NULL, NULL, 0, 0, 0, 0, 0, 0, false, NULL, 0);
8389 if (keyuses->push_back(key_end)) // added for easy testing
8390 return NULL;
8391
8392 return keyuses;
8393 }
8394
8395
8396 /**
8397 Move const tables first in the position array.
8398
8399 Increment the number of const tables and set same basic properties for the
8400 const table.
8401 A const table looked up by a key has type JT_CONST.
8402 A const table with a single row has type JT_SYSTEM.
8403
8404 @param tab Table that is designated as a const table
8405 @param key The key definition to use for this table (NULL if table scan)
8406 */
8407
mark_const_table(JOIN_TAB * tab,Key_use * key)8408 void JOIN::mark_const_table(JOIN_TAB *tab, Key_use *key)
8409 {
8410 POSITION *const position= positions + const_tables;
8411 position->table= tab;
8412 position->key= key;
8413 position->rows_fetched= 1.0; // This is a const table
8414 position->filter_effect= 1.0;
8415 position->prefix_rowcount= 1.0;
8416 position->read_cost= 0.0;
8417 position->ref_depend_map= 0;
8418 position->loosescan_key= MAX_KEY; // Not a LooseScan
8419 position->sj_strategy= SJ_OPT_NONE;
8420 positions->use_join_buffer= false;
8421
8422 // Move the const table as far down as possible in best_ref
8423 JOIN_TAB **pos= best_ref + const_tables + 1;
8424 for (JOIN_TAB *next= best_ref[const_tables]; next != tab; pos++)
8425 {
8426 JOIN_TAB *const tmp= pos[0];
8427 pos[0]= next;
8428 next= tmp;
8429 }
8430 best_ref[const_tables]= tab;
8431
8432 tab->set_type(key ? JT_CONST : JT_SYSTEM);
8433
8434 const_table_map|= tab->table_ref->map();
8435
8436 const_tables++;
8437 }
8438
8439
make_outerjoin_info()8440 void JOIN::make_outerjoin_info()
8441 {
8442 DBUG_ENTER("JOIN::make_outerjoin_info");
8443
8444 assert(select_lex->outer_join);
8445 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
8446
8447 select_lex->reset_nj_counters();
8448
8449 for (uint i= const_tables; i < tables; ++i)
8450 {
8451 JOIN_TAB *const tab= best_ref[i];
8452 TABLE *const table= tab->table();
8453 if (!table)
8454 continue;
8455
8456 TABLE_LIST *const tbl= tab->table_ref;
8457
8458 if (tbl->outer_join)
8459 {
8460 /*
8461 Table tab is the only one inner table for outer join.
8462 (Like table t4 for the table reference t3 LEFT JOIN t4 ON t3.a=t4.a
8463 is in the query above.)
8464 */
8465 tab->set_last_inner(i);
8466 tab->set_first_inner(i);
8467 tab->init_join_cond_ref(tbl);
8468 tab->cond_equal= tbl->cond_equal;
8469 /*
8470 If this outer join nest is embedded in another join nest,
8471 link the join-tabs:
8472 */
8473 TABLE_LIST *const outer_join_nest= tbl->outer_join_nest();
8474 if (outer_join_nest)
8475 tab->set_first_upper(outer_join_nest->nested_join->first_nested);
8476 }
8477 for (TABLE_LIST *embedding= tbl->embedding;
8478 embedding;
8479 embedding= embedding->embedding)
8480 {
8481 // Ignore join nests that are not outer join nests:
8482 if (!embedding->join_cond_optim())
8483 continue;
8484 NESTED_JOIN *const nested_join= embedding->nested_join;
8485 if (!nested_join->nj_counter)
8486 {
8487 /*
8488 Table tab is the first inner table for nested_join.
8489 Save reference to it in the nested join structure.
8490 */
8491 nested_join->first_nested= i;
8492 tab->init_join_cond_ref(embedding);
8493 tab->cond_equal= tbl->cond_equal;
8494
8495 TABLE_LIST *const outer_join_nest= embedding->outer_join_nest();
8496 if (outer_join_nest)
8497 tab->set_first_upper(outer_join_nest->nested_join->first_nested);
8498 }
8499 if (tab->first_inner() == NO_PLAN_IDX)
8500 tab->set_first_inner(nested_join->first_nested);
8501 if (++nested_join->nj_counter < nested_join->nj_total)
8502 break;
8503 // Table tab is the last inner table for nested join.
8504 best_ref[nested_join->first_nested]->set_last_inner(i);
8505 }
8506 }
8507 DBUG_VOID_RETURN;
8508 }
8509
8510 /**
8511 Build a condition guarded by match variables for embedded outer joins.
8512 When generating a condition for a table as part of an outer join condition
8513 or the WHERE condition, the table in question may also be part of an
8514 embedded outer join. In such cases, the condition must be guarded by
8515 the match variable for this embedded outer join. Such embedded outer joins
8516 may also be recursively embedded in other joins.
8517
8518 The function recursively adds guards for a condition ascending from tab
8519 to root_tab, which is the first inner table of an outer join,
8520 or NULL if the condition being handled is the WHERE clause.
8521
8522 @param idx index of the first inner table for the inner-most outer join
8523 @param cond the predicate to be guarded (must be set)
8524 @param root_idx index of the inner table to stop at
8525 (is NO_PLAN_IDX if this is the WHERE clause)
8526
8527 @return
8528 - pointer to the guarded predicate, if success
8529 - NULL if error
8530 */
8531
8532 static Item*
add_found_match_trig_cond(JOIN * join,plan_idx idx,Item * cond,plan_idx root_idx)8533 add_found_match_trig_cond(JOIN *join, plan_idx idx, Item *cond,
8534 plan_idx root_idx)
8535 {
8536 ASSERT_BEST_REF_IN_JOIN_ORDER(join);
8537 assert(cond);
8538
8539 for ( ; idx != root_idx; idx= join->best_ref[idx]->first_upper())
8540 {
8541 if (!(cond= new Item_func_trig_cond(cond, NULL, join, idx,
8542 Item_func_trig_cond::FOUND_MATCH)))
8543 return NULL;
8544
8545 cond->quick_fix_field();
8546 cond->update_used_tables();
8547 }
8548
8549 return cond;
8550 }
8551
8552
8553 /**
8554 Attach outer join conditions to generated table conditions in an optimal way.
8555
8556 @param last_tab - Last table that has been added to the current plan.
8557 Pre-condition: If this is the last inner table of an outer
8558 join operation, a join condition is attached to the first
8559 inner table of that outer join operation.
8560
8561 @return false if success, true if error.
8562
8563 Outer join conditions are attached to individual tables, but we can analyze
8564 those conditions only when reaching the last inner table of an outer join
8565 operation. Notice also that a table can be last within several outer join
8566 nests, hence the outer for() loop of this function.
8567
8568 Example:
8569 SELECT * FROM t1 LEFT JOIN (t2 LEFT JOIN t3 ON t2.a=t3.a) ON t1.a=t2.a
8570
8571 Table t3 is last both in the join nest (t2 - t3) and in (t1 - (t2 - t3))
8572 Thus, join conditions for both join nests will be evaluated when reaching
8573 this table.
8574
8575 For each outer join operation processed, the join condition is split
8576 optimally over the inner tables of the outer join. The split-out conditions
8577 are later referred to as table conditions (but note that several table
8578 conditions stemming from different join operations may be combined into
8579 a composite table condition).
8580
8581 Example:
8582 Consider the above query once more.
8583 The predicate t1.a=t2.a can be evaluated when rows from t1 and t2 are ready,
8584 ie at table t2. The predicate t2.a=t3.a can be evaluated at table t3.
8585
8586 Each non-constant split-out table condition is guarded by a match variable
8587 that enables it only when a matching row is found for all the embedded
8588 outer join operations.
8589
8590 Each split-out table condition is guarded by a variable that turns the
8591 condition off just before a null-complemented row for the outer join
8592 operation is formed. Thus, the join condition will not be checked for
8593 the null-complemented row.
8594 */
8595
attach_join_conditions(plan_idx last_tab)8596 bool JOIN::attach_join_conditions(plan_idx last_tab)
8597 {
8598 DBUG_ENTER("JOIN::attach_join_conditions");
8599 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
8600
8601 for (plan_idx first_inner= best_ref[last_tab]->first_inner();
8602 first_inner != NO_PLAN_IDX &&
8603 best_ref[first_inner]->last_inner() == last_tab;
8604 first_inner= best_ref[first_inner]->first_upper())
8605 {
8606 /*
8607 Table last_tab is the last inner table of an outer join, locate
8608 the corresponding join condition from the first inner table of the
8609 same outer join:
8610 */
8611 Item *const join_cond= best_ref[first_inner]->join_cond();
8612 assert(join_cond);
8613 /*
8614 Add the constant part of the join condition to the first inner table
8615 of the outer join.
8616 */
8617 Item *cond= make_cond_for_table(join_cond, const_table_map,
8618 (table_map) 0, false);
8619 if (cond)
8620 {
8621 cond= new Item_func_trig_cond(cond, NULL, this, first_inner,
8622 Item_func_trig_cond::IS_NOT_NULL_COMPL);
8623 if (!cond)
8624 DBUG_RETURN(true);
8625 if (cond->fix_fields(thd, NULL))
8626 DBUG_RETURN(true);
8627
8628 if (best_ref[first_inner]->and_with_condition(cond))
8629 DBUG_RETURN(true);
8630 }
8631 /*
8632 Split the non-constant part of the join condition into parts that
8633 can be attached to the inner tables of the outer join.
8634 */
8635 for (plan_idx i= first_inner; i <= last_tab; ++i)
8636 {
8637 table_map prefix_tables= best_ref[i]->prefix_tables();
8638 table_map added_tables= best_ref[i]->added_tables();
8639
8640 /*
8641 When handling the first inner table of an outer join, we may also
8642 reference all tables ahead of this table:
8643 */
8644 if (i == first_inner)
8645 added_tables= prefix_tables;
8646 /*
8647 We need RAND_TABLE_BIT on the last inner table, in case there is a
8648 non-deterministic function in the join condition.
8649 (RAND_TABLE_BIT is set for the last table of the join plan,
8650 but this is not sufficient for join conditions, which may have a
8651 last inner table that is ahead of the last table of the join plan).
8652 */
8653 if (i == last_tab)
8654 {
8655 prefix_tables|= RAND_TABLE_BIT;
8656 added_tables|= RAND_TABLE_BIT;
8657 }
8658 cond= make_cond_for_table(join_cond, prefix_tables, added_tables, false);
8659 if (cond == NULL)
8660 continue;
8661 /*
8662 If the table is part of an outer join that is embedded in the
8663 outer join currently being processed, wrap the condition in
8664 triggered conditions for match variables of such embedded outer joins.
8665 */
8666 if (!(cond= add_found_match_trig_cond(this, best_ref[i]->first_inner(),
8667 cond, first_inner)))
8668 DBUG_RETURN(true);
8669
8670 // Add the guard turning the predicate off for the null-complemented row.
8671 cond= new Item_func_trig_cond(cond, NULL, this, first_inner,
8672 Item_func_trig_cond::IS_NOT_NULL_COMPL);
8673 if (!cond)
8674 DBUG_RETURN(true);
8675 if (cond->fix_fields(thd, NULL))
8676 DBUG_RETURN(true);
8677
8678 // Add the generated condition to the existing table condition
8679 if (best_ref[i]->and_with_condition(cond))
8680 DBUG_RETURN(true);
8681 }
8682 }
8683
8684 DBUG_RETURN(false);
8685 }
8686
8687
8688 /*****************************************************************************
8689 Remove calculation with tables that aren't yet read. Remove also tests
8690 against fields that are read through key where the table is not a
8691 outer join table.
8692 We can't remove tests that are made against columns which are stored
8693 in sorted order.
8694 *****************************************************************************/
8695
8696 static Item *
part_of_refkey(TABLE * table,TABLE_REF * ref,Field * field)8697 part_of_refkey(TABLE *table, TABLE_REF *ref, Field *field)
8698 {
8699 uint ref_parts= ref->key_parts;
8700 if (ref_parts)
8701 {
8702 if (ref->has_guarded_conds())
8703 return NULL;
8704
8705 const KEY_PART_INFO *key_part= table->key_info[ref->key].key_part;
8706
8707 for (uint part=0 ; part < ref_parts ; part++,key_part++)
8708 if (field->eq(key_part->field) &&
8709 !(key_part->key_part_flag & HA_PART_KEY_SEG))
8710 return ref->items[part];
8711 }
8712 return NULL;
8713 }
8714
8715
8716 /**
8717 @return
8718 1 if right_item is used removable reference key on left_item
8719
8720 @note see comments in make_cond_for_table_from_pred() about careful
8721 usage/modifications of test_if_ref().
8722 */
8723
test_if_ref(Item * root_cond,Item_field * left_item,Item * right_item)8724 static bool test_if_ref(Item *root_cond,
8725 Item_field *left_item,Item *right_item)
8726 {
8727 if (left_item->depended_from)
8728 return false; // don't even read join_tab of inner subquery!
8729 Field *field=left_item->field;
8730 JOIN_TAB *join_tab= field->table->reginfo.join_tab;
8731 if (join_tab)
8732 ASSERT_BEST_REF_IN_JOIN_ORDER(join_tab->join());
8733 // No need to change const test
8734 if (!field->table->const_table && join_tab &&
8735 (join_tab->first_inner() == NO_PLAN_IDX ||
8736 join_tab->join()->best_ref[join_tab->first_inner()]->join_cond() == root_cond) &&
8737 /* "ref_or_null" implements "x=y or x is null", not "x=y" */
8738 (join_tab->type() != JT_REF_OR_NULL))
8739 {
8740 Item *ref_item= part_of_refkey(field->table, &join_tab->ref(), field);
8741 if (ref_item && ref_item->eq(right_item,1))
8742 {
8743 right_item= right_item->real_item();
8744 if (right_item->type() == Item::FIELD_ITEM)
8745 return (field->eq_def(((Item_field *) right_item)->field));
8746 /* remove equalities injected by IN->EXISTS transformation */
8747 else if (right_item->type() == Item::CACHE_ITEM)
8748 return ((Item_cache *)right_item)->eq_def (field);
8749 if (right_item->const_item() && !(right_item->is_null()))
8750 {
8751 /*
8752 We can remove all fields except:
8753 1. String data types:
8754 - For BINARY/VARBINARY fields with equality against a
8755 string: Ref access can return more rows than match the
8756 string. The reason seems to be that the string constant
8757 is not "padded" to the full length of the field when
8758 setting up ref access. @todo Change how ref access for
8759 BINARY/VARBINARY fields are done so that only qualifying
8760 rows are returned from the storage engine.
8761 2. Float data type: Comparison of float can differ
8762 - When we search "WHERE field=value" using an index,
8763 the "value" side is converted from double to float by
8764 Field_float::store(), then two floats are compared.
8765 - When we search "WHERE field=value" without indexes,
8766 the "field" side is converted from float to double by
8767 Field_float::val_real(), then two doubles are compared.
8768 Note about string data types: All currently existing
8769 collations have "PAD SPACE" style. If we introduce "NO PAD"
8770 collations this function must return false for such
8771 collations, because trailing space compression for indexes
8772 makes the table value and the index value not equal to each
8773 other in "NO PAD" collations. As index lookup strips
8774 trailing spaces, it can return false candidates. Further
8775 comparison of the actual table values is required.
8776 */
8777 if (!((field->type() == MYSQL_TYPE_STRING || // 1
8778 field->type() == MYSQL_TYPE_VARCHAR) && field->binary()) &&
8779 !(field->type() == MYSQL_TYPE_FLOAT && field->decimals() > 0)) // 2
8780 {
8781 return !right_item->save_in_field_no_warnings(field, true);
8782 }
8783 }
8784 }
8785 }
8786 return 0; // keep test
8787 }
8788
8789
8790 /*
8791 Remove the predicates pushed down into the subquery
8792
8793 DESCRIPTION
8794 Given that this join will be executed using (unique|index)_subquery,
8795 without "checking NULL", remove the predicates that were pushed down
8796 into the subquery.
8797
8798 If the subquery compares scalar values, we can remove the condition that
8799 was wrapped into trig_cond (it will be checked when needed by the subquery
8800 engine)
8801
8802 If the subquery compares row values, we need to keep the wrapped
8803 equalities in the WHERE clause: when the left (outer) tuple has both NULL
8804 and non-NULL values, we'll do a full table scan and will rely on the
8805 equalities corresponding to non-NULL parts of left tuple to filter out
8806 non-matching records.
8807
8808 If '*where' is a triggered condition, or contains 'OR x IS NULL', or
8809 contains a condition coming from the original subquery's WHERE clause, or
8810 if there are more than one outer expressions, then WHERE is not of the
8811 simple form:
8812 outer_expr = inner_expr
8813 and thus this function does nothing.
8814
8815 If the index is on prefix (=> test_if_ref() is false), then the equality
8816 is needed as post-filter, so this function does nothing.
8817
8818 TODO: We can remove the equalities that will be guaranteed to be true by the
8819 fact that subquery engine will be using index lookup. This must be done only
8820 for cases where there are no conversion errors of significance, e.g. 257
8821 that is searched in a byte. But this requires homogenization of the return
8822 codes of all Field*::store() methods.
8823 */
remove_subq_pushed_predicates()8824 void JOIN::remove_subq_pushed_predicates()
8825 {
8826 if (where_cond->type() != Item::FUNC_ITEM)
8827 return;
8828 Item_func *const func= static_cast<Item_func *>(where_cond);
8829 if (func->functype() == Item_func::EQ_FUNC &&
8830 func->arguments()[0]->type() == Item::REF_ITEM &&
8831 func->arguments()[1]->type() == Item::FIELD_ITEM &&
8832 test_if_ref(func,
8833 static_cast<Item_field *>(func->arguments()[1]),
8834 func->arguments()[0]))
8835 {
8836 where_cond= NULL;
8837 return;
8838 }
8839 }
8840
8841
8842 /**
8843 @brief
8844 Add keys to derived tables'/views' result tables in a list
8845
8846 @param select_lex generate derived keys for select_lex's derived tables
8847
8848 @details
8849 This function generates keys for all derived tables/views of the select_lex
8850 to which this join corresponds to with help of the TABLE_LIST:generate_keys
8851 function.
8852
8853 @return FALSE all keys were successfully added.
8854 @return TRUE OOM error
8855 */
8856
generate_derived_keys()8857 bool JOIN::generate_derived_keys()
8858 {
8859 assert(select_lex->materialized_derived_table_count);
8860
8861 for (TABLE_LIST *table= select_lex->leaf_tables;
8862 table;
8863 table= table->next_leaf)
8864 {
8865 table->derived_keys_ready= TRUE;
8866 /* Process tables that aren't materialized yet. */
8867 if (table->uses_materialization() && !table->table->is_created() &&
8868 table->generate_keys())
8869 return TRUE;
8870 }
8871 return FALSE;
8872 }
8873
8874
8875 /**
8876 @brief
8877 Drop unused keys for each materialized derived table/view
8878
8879 @details
8880 For each materialized derived table/view, call TABLE::use_index to save one
8881 index chosen by the optimizer and ignore others. If no key is chosen, then all
8882 keys will be ignored.
8883 */
8884
drop_unused_derived_keys()8885 void JOIN::drop_unused_derived_keys()
8886 {
8887 assert(select_lex->materialized_derived_table_count);
8888 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
8889
8890 for (uint i= 0 ; i < tables ; i++)
8891 {
8892 JOIN_TAB *tab= best_ref[i];
8893 TABLE *table= tab->table();
8894 /*
8895 Save chosen key description if:
8896 1) it's a materialized derived table
8897 2) it's not yet instantiated
8898 3) some keys are defined for it
8899 */
8900 if (table &&
8901 tab->table_ref->uses_materialization() && // (1)
8902 !table->is_created() && // (2)
8903 table->max_keys > 0) // (3)
8904 {
8905 Key_use *keyuse= tab->position()->key;
8906
8907 table->use_index(keyuse ? keyuse->key : -1);
8908
8909 const bool key_is_const= keyuse && tab->const_keys.is_set(keyuse->key);
8910 tab->const_keys.clear_all();
8911 tab->keys().clear_all();
8912
8913 if (!keyuse)
8914 continue;
8915
8916 /*
8917 Update the selected "keyuse" to point to key number 0.
8918 Notice that unused keyuse entries still point to the deleted
8919 candidate keys. tab->keys (and tab->const_keys if the chosen key
8920 is constant) should reference key object no. 0 as well.
8921 */
8922 tab->keys().set_bit(0);
8923 if (key_is_const)
8924 tab->const_keys.set_bit(0);
8925
8926 const uint oldkey= keyuse->key;
8927 for (; keyuse->table_ref == tab->table_ref && keyuse->key == oldkey;
8928 keyuse++)
8929 keyuse->key= 0;
8930 }
8931 }
8932 }
8933
8934
8935 /**
8936 Cache constant expressions in WHERE, HAVING, ON conditions.
8937
8938 @return False if success, True if error
8939
8940 @note This function is run after conditions have been pushed down to
8941 individual tables, so transformation is applied to JOIN_TAB::condition
8942 and not to the WHERE condition.
8943 */
8944
cache_const_exprs()8945 bool JOIN::cache_const_exprs()
8946 {
8947 /* No need in cache if all tables are constant. */
8948 assert(!plan_is_const());
8949 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
8950
8951 for (uint i= const_tables; i < tables; i++)
8952 {
8953 Item *condition= best_ref[i]->condition();
8954 if (condition == NULL)
8955 continue;
8956 Item *cache_item= NULL;
8957 Item **analyzer_arg= &cache_item;
8958 condition=
8959 condition->compile(&Item::cache_const_expr_analyzer,
8960 (uchar **)&analyzer_arg,
8961 &Item::cache_const_expr_transformer,
8962 (uchar *)&cache_item);
8963 if (condition == NULL)
8964 return true;
8965 best_ref[i]->set_condition(condition);
8966 }
8967 if (having_cond)
8968 {
8969 Item *cache_item= NULL;
8970 Item **analyzer_arg= &cache_item;
8971 having_cond= having_cond->compile(&Item::cache_const_expr_analyzer,
8972 (uchar **)&analyzer_arg,
8973 &Item::cache_const_expr_transformer,
8974 (uchar *)&cache_item);
8975 if (having_cond == NULL)
8976 return true;
8977 }
8978 return false;
8979 }
8980
8981
8982 /**
8983 Extract a condition that can be checked after reading given table
8984
8985 @param cond Condition to analyze
8986 @param tables Tables for which "current field values" are available
8987 @param used_table Table(s) that we are extracting the condition for (may
8988 also include PSEUDO_TABLE_BITS, and may be zero)
8989 @param exclude_expensive_cond Do not push expensive conditions
8990
8991 @retval <>NULL Generated condition
8992 @retval = NULL Already checked, OR error
8993
8994 @details
8995 Extract the condition that can be checked after reading the table(s)
8996 specified in @c used_table, given that current-field values for tables
8997 specified in @c tables bitmap are available.
8998 If @c used_table is 0, extract conditions for all tables in @c tables.
8999
9000 This function can be used to extract conditions relevant for a table
9001 in a join order. Together with its caller, it will ensure that all
9002 conditions are attached to the first table in the join order where all
9003 necessary fields are available, and it will also ensure that a given
9004 condition is attached to only one table.
9005 To accomplish this, first initialize @c tables to the empty
9006 set. Then, loop over all tables in the join order, set @c used_table to
9007 the bit representing the current table, accumulate @c used_table into the
9008 @c tables set, and call this function. To ensure correct handling of
9009 const expressions and outer references, add the const table map and
9010 OUTER_REF_TABLE_BIT to @c used_table for the first table. To ensure
9011 that random expressions are evaluated for the final table, add
9012 RAND_TABLE_BIT to @c used_table for the final table.
9013
9014 The function assumes that constant, inexpensive parts of the condition
9015 have already been checked. Constant, expensive parts will be attached
9016 to the first table in the join order, provided that the above call
9017 sequence is followed.
9018
9019 The call order will ensure that conditions covering tables in @c tables
9020 minus those in @c used_table, have already been checked.
9021
9022 The function takes into account that some parts of the condition are
9023 guaranteed to be true by employed 'ref' access methods (the code that
9024 does this is located at the end, search down for "EQ_FUNC").
9025
9026 @note
9027 make_cond_for_info_schema() uses an algorithm similar to
9028 make_cond_for_table().
9029 */
9030
9031 Item *
make_cond_for_table(Item * cond,table_map tables,table_map used_table,bool exclude_expensive_cond)9032 make_cond_for_table(Item *cond, table_map tables, table_map used_table,
9033 bool exclude_expensive_cond)
9034 {
9035 return make_cond_for_table_from_pred(cond, cond, tables, used_table,
9036 exclude_expensive_cond);
9037 }
9038
9039 static Item *
make_cond_for_table_from_pred(Item * root_cond,Item * cond,table_map tables,table_map used_table,bool exclude_expensive_cond)9040 make_cond_for_table_from_pred(Item *root_cond, Item *cond,
9041 table_map tables, table_map used_table,
9042 bool exclude_expensive_cond)
9043 {
9044 /*
9045 Ignore this condition if
9046 1. We are extracting conditions for a specific table, and
9047 2. that table is not referenced by the condition, but not if
9048 3. this is a constant condition not checked at optimization time and
9049 this is the first table we are extracting conditions for.
9050 (Assuming that used_table == tables for the first table.)
9051 */
9052 if (used_table && // 1
9053 !(cond->used_tables() & used_table) && // 2
9054 !(cond->is_expensive() && used_table == tables)) // 3
9055 return NULL;
9056
9057 if (cond->type() == Item::COND_ITEM)
9058 {
9059 if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
9060 {
9061 /* Create new top level AND item */
9062 Item_cond_and *new_cond= new Item_cond_and;
9063 if (!new_cond)
9064 return NULL;
9065 List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
9066 Item *item;
9067 while ((item= li++))
9068 {
9069 Item *fix= make_cond_for_table_from_pred(root_cond, item,
9070 tables, used_table,
9071 exclude_expensive_cond);
9072 if (fix)
9073 new_cond->argument_list()->push_back(fix);
9074 }
9075 switch (new_cond->argument_list()->elements) {
9076 case 0:
9077 return NULL; // Always true
9078 case 1:
9079 return new_cond->argument_list()->head();
9080 default:
9081 if (new_cond->fix_fields(current_thd, NULL))
9082 return NULL;
9083 return new_cond;
9084 }
9085 }
9086 else
9087 { // Or list
9088 Item_cond_or *new_cond= new Item_cond_or;
9089 if (!new_cond)
9090 return NULL;
9091 List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
9092 Item *item;
9093 while ((item= li++))
9094 {
9095 Item *fix= make_cond_for_table_from_pred(root_cond, item,
9096 tables, 0L,
9097 exclude_expensive_cond);
9098 if (!fix)
9099 return NULL; // Always true
9100 new_cond->argument_list()->push_back(fix);
9101 }
9102 if (new_cond->fix_fields(current_thd, NULL))
9103 return NULL;
9104 return new_cond;
9105 }
9106 }
9107
9108 /*
9109 Omit this condition if
9110 1. It has been marked as omittable before, or
9111 2. Some tables referred by the condition are not available, or
9112 3. We are extracting conditions for all tables, the condition is
9113 considered 'expensive', and we want to delay evaluation of such
9114 conditions to the execution phase.
9115 */
9116 if (cond->marker == 3 || // 1
9117 (cond->used_tables() & ~tables) || // 2
9118 (!used_table && exclude_expensive_cond && cond->is_expensive())) // 3
9119 return NULL;
9120
9121 /*
9122 Extract this condition if
9123 1. It has already been marked as applicable, or
9124 2. It is not a <comparison predicate> (=, <, >, <=, >=, <=>)
9125 */
9126 if (cond->marker == 2 || // 1
9127 cond->eq_cmp_result() == Item::COND_OK) // 2
9128 return cond;
9129
9130 /*
9131 Remove equalities that are guaranteed to be true by use of 'ref' access
9132 method.
9133 Note that ref access implements "table1.field1 <=> table2.indexed_field2",
9134 i.e. if it passed a NULL field1, it will return NULL indexed_field2 if
9135 there are.
9136 Thus the equality "table1.field1 = table2.indexed_field2",
9137 is equivalent to "ref access AND table1.field1 IS NOT NULL"
9138 i.e. "ref access and proper setting/testing of ref->null_rejecting".
9139 Thus, we must be careful, that when we remove equalities below we also
9140 set ref->null_rejecting, and test it at execution; otherwise wrong NULL
9141 matches appear.
9142 So:
9143 - for the optimization phase, the code which is below, and the code in
9144 test_if_ref(), and in add_key_field(), must be kept in sync: if the
9145 applicability conditions in one place are relaxed, they should also be
9146 relaxed elsewhere.
9147 - for the execution phase, all possible execution methods must test
9148 ref->null_rejecting.
9149 */
9150 if (cond->type() == Item::FUNC_ITEM &&
9151 ((Item_func*) cond)->functype() == Item_func::EQ_FUNC)
9152 {
9153 Item *left_item= ((Item_func*) cond)->arguments()[0]->real_item();
9154 Item *right_item= ((Item_func*) cond)->arguments()[1]->real_item();
9155 if ((left_item->type() == Item::FIELD_ITEM &&
9156 test_if_ref(root_cond, (Item_field*) left_item, right_item)) ||
9157 (right_item->type() == Item::FIELD_ITEM &&
9158 test_if_ref(root_cond, (Item_field*) right_item, left_item)))
9159 {
9160 cond->marker= 3; // Condition can be omitted
9161 return NULL;
9162 }
9163 }
9164 cond->marker= 2; // Mark condition as applicable
9165 return cond;
9166 }
9167
9168
9169 /**
9170 Separates the predicates in a join condition and pushes them to the
9171 join step where all involved tables are available in the join prefix.
9172 ON clauses from JOIN expressions are also pushed to the most appropriate step.
9173
9174 @param join Join object where predicates are pushed.
9175
9176 @param cond Pointer to condition which may contain an arbitrary number of
9177 predicates, combined using AND, OR and XOR items.
9178 If NULL, equivalent to a predicate that returns TRUE for all
9179 row combinations.
9180
9181
9182 @retval true Found impossible WHERE clause, or out-of-memory
9183 @retval false Other
9184 */
9185
make_join_select(JOIN * join,Item * cond)9186 static bool make_join_select(JOIN *join, Item *cond)
9187 {
9188 THD *thd= join->thd;
9189 Opt_trace_context * const trace= &thd->opt_trace;
9190 DBUG_ENTER("make_join_select");
9191 ASSERT_BEST_REF_IN_JOIN_ORDER(join);
9192
9193 // Add IS NOT NULL conditions to table conditions:
9194 add_not_null_conds(join);
9195
9196 /*
9197 Extract constant conditions that are part of the WHERE clause.
9198 Constant parts of join conditions from outer joins are attached to
9199 the appropriate table condition in JOIN::attach_join_conditions().
9200 */
9201 if (cond) /* Because of QUICK_GROUP_MIN_MAX_SELECT */
9202 { /* there may be a select without a cond. */
9203 if (join->primary_tables > 1)
9204 cond->update_used_tables(); // Table number may have changed
9205 if (join->plan_is_const() &&
9206 join->select_lex->master_unit() ==
9207 thd->lex->unit) // The outer-most query block
9208 join->const_table_map|= RAND_TABLE_BIT;
9209 }
9210 /*
9211 Extract conditions that depend on constant tables.
9212 The const part of the query's WHERE clause can be checked immediately
9213 and if it is not satisfied then the join has empty result
9214 */
9215 Item *const_cond= NULL;
9216 if (cond)
9217 const_cond= make_cond_for_table(cond, join->const_table_map,
9218 (table_map) 0, true);
9219
9220 // Add conditions added by add_not_null_conds()
9221 for (uint i= 0; i < join->const_tables; i++)
9222 {
9223 if (and_conditions(&const_cond, join->best_ref[i]->condition()))
9224 DBUG_RETURN(true);
9225 }
9226 DBUG_EXECUTE("where", print_where(const_cond, "constants", QT_ORDINARY););
9227 if (const_cond != NULL)
9228 {
9229 const bool const_cond_result= const_cond->val_int() != 0;
9230 if (thd->is_error())
9231 DBUG_RETURN(true);
9232
9233 Opt_trace_object trace_const_cond(trace);
9234 trace_const_cond.add("condition_on_constant_tables", const_cond)
9235 .add("condition_value", const_cond_result);
9236 if (!const_cond_result)
9237 {
9238 DBUG_PRINT("info",("Found impossible WHERE condition"));
9239 DBUG_RETURN(true);
9240 }
9241 }
9242
9243 /*
9244 Extract remaining conditions from WHERE clause and join conditions,
9245 and attach them to the most appropriate table condition. This means that
9246 a condition will be evaluated as soon as all fields it depends on are
9247 available. For outer join conditions, the additional criterion is that
9248 we must have determined whether outer-joined rows are available, or
9249 have been NULL-extended, see JOIN::attach_join_conditions() for details.
9250 */
9251 {
9252 Opt_trace_object trace_wrapper(trace);
9253 Opt_trace_object
9254 trace_conditions(trace, "attaching_conditions_to_tables");
9255 trace_conditions.add("original_condition", cond);
9256 Opt_trace_array
9257 trace_attached_comp(trace, "attached_conditions_computation");
9258
9259 for (uint i=join->const_tables ; i < join->tables ; i++)
9260 {
9261 JOIN_TAB *const tab= join->best_ref[i];
9262
9263 if (!tab->position())
9264 continue;
9265 /*
9266 first_inner is the X in queries like:
9267 SELECT * FROM t1 LEFT OUTER JOIN (t2 JOIN t3) ON X
9268 */
9269 const plan_idx first_inner= tab->first_inner();
9270 const table_map used_tables= tab->prefix_tables();
9271 const table_map current_map= tab->added_tables();
9272 Item *tmp= NULL;
9273
9274 if (cond)
9275 tmp= make_cond_for_table(cond,used_tables,current_map, 0);
9276 /* Add conditions added by add_not_null_conds(). */
9277 if (tab->condition() && and_conditions(&tmp, tab->condition()))
9278 DBUG_RETURN(true);
9279
9280
9281 if (cond && !tmp && tab->quick())
9282 { // Outer join
9283 assert(tab->type() == JT_RANGE || tab->type() == JT_INDEX_MERGE);
9284 /*
9285 Hack to handle the case where we only refer to a table
9286 in the ON part of an OUTER JOIN. In this case we want the code
9287 below to check if we should use 'quick' instead.
9288 */
9289 DBUG_PRINT("info", ("Item_int"));
9290 tmp= new Item_int((longlong) 1,1); // Always true
9291 }
9292 if (tmp || !cond || tab->type() == JT_REF || tab->type() == JT_REF_OR_NULL ||
9293 tab->type() == JT_EQ_REF || first_inner != NO_PLAN_IDX)
9294 {
9295 DBUG_EXECUTE("where",print_where(tmp,tab->table()->alias, QT_ORDINARY););
9296 /*
9297 If tab is an inner table of an outer join operation,
9298 add a match guard to the pushed down predicate.
9299 The guard will turn the predicate on only after
9300 the first match for outer tables is encountered.
9301 */
9302 if (cond && tmp)
9303 {
9304 /*
9305 Because of QUICK_GROUP_MIN_MAX_SELECT there may be a select without
9306 a cond, so neutralize the hack above.
9307 */
9308 if (!(tmp= add_found_match_trig_cond(join, first_inner, tmp, NO_PLAN_IDX)))
9309 DBUG_RETURN(true);
9310 tab->set_condition(tmp);
9311 /* Push condition to storage engine if this is enabled
9312 and the condition is not guarded */
9313 if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN) &&
9314 first_inner == NO_PLAN_IDX)
9315 {
9316 Item *push_cond=
9317 make_cond_for_table(tmp, tab->table_ref->map(),
9318 tab->table_ref->map(), 0);
9319 if (push_cond)
9320 {
9321 /* Push condition to handler */
9322 if (!tab->table()->file->cond_push(push_cond))
9323 tab->table()->file->pushed_cond= push_cond;
9324 }
9325 }
9326 }
9327 else
9328 {
9329 tab->set_condition(NULL);
9330 }
9331
9332 DBUG_EXECUTE("where",print_where(tmp,tab->table()->alias, QT_ORDINARY););
9333
9334 if (tab->quick())
9335 {
9336 if (tab->needed_reg.is_clear_all() && tab->type() != JT_CONST)
9337 {
9338 /*
9339 We keep (for now) the QUICK AM calculated in
9340 get_quick_record_count().
9341 */
9342 assert(tab->quick()->is_valid());
9343 }
9344 else
9345 {
9346 delete tab->quick();
9347 tab->set_quick(NULL);
9348 }
9349 }
9350
9351 if ((tab->type() == JT_ALL || tab->type() == JT_RANGE ||
9352 tab->type() == JT_INDEX_MERGE || tab->type() == JT_INDEX_SCAN) &&
9353 tab->use_quick != QS_RANGE)
9354 {
9355 /*
9356 We plan to scan (table/index/range scan).
9357 Check again if we should use an index. We can use an index if:
9358
9359 1a) There is a condition that range optimizer can work on, and
9360 1b) There are non-constant conditions on one or more keys, and
9361 1c) Some of the non-constant fields may have been read
9362 already. This may be the case if this is not the first
9363 table in the join OR this is a subselect with
9364 non-constant conditions referring to an outer table
9365 (dependent subquery)
9366 or,
9367 2a) There are conditions only relying on constants
9368 2b) This is the first non-constant table
9369 2c) There is a limit of rows to read that is lower than
9370 the fanout for this table, predicate filters included
9371 (i.e., the estimated number of rows that will be
9372 produced for this table per row combination of
9373 previous tables)
9374 2d) The query is NOT run with FOUND_ROWS() (because in that
9375 case we have to scan through all rows to count them anyway)
9376 */
9377 enum { DONT_RECHECK, NOT_FIRST_TABLE, LOW_LIMIT }
9378 recheck_reason= DONT_RECHECK;
9379
9380 assert(tab->const_keys.is_subset(tab->keys()));
9381
9382 const join_type orig_join_type= tab->type();
9383 const QUICK_SELECT_I *const orig_quick= tab->quick();
9384
9385 if (cond && // 1a
9386 (tab->keys() != tab->const_keys) && // 1b
9387 (i > 0 || // 1c
9388 (join->select_lex->master_unit()->item &&
9389 cond->used_tables() & OUTER_REF_TABLE_BIT)))
9390 recheck_reason= NOT_FIRST_TABLE;
9391 else if (!tab->const_keys.is_clear_all() && // 2a
9392 i == join->const_tables && // 2b
9393 (join->unit->select_limit_cnt <
9394 (tab->position()->rows_fetched *
9395 tab->position()->filter_effect)) && // 2c
9396 !join->calc_found_rows) // 2d
9397 recheck_reason= LOW_LIMIT;
9398
9399 if (tab->position()->sj_strategy == SJ_OPT_LOOSE_SCAN)
9400 {
9401 /*
9402 Semijoin loose scan has settled for a certain index-based access
9403 method with suitable characteristics, don't substitute it.
9404 */
9405 recheck_reason= DONT_RECHECK;
9406 }
9407
9408 if (recheck_reason != DONT_RECHECK)
9409 {
9410 Opt_trace_object trace_one_table(trace);
9411 trace_one_table.add_utf8_table(tab->table_ref);
9412 Opt_trace_object trace_table(trace, "rechecking_index_usage");
9413 if (recheck_reason == NOT_FIRST_TABLE)
9414 trace_table.add_alnum("recheck_reason", "not_first_table");
9415 else
9416 trace_table.add_alnum("recheck_reason", "low_limit").
9417 add("limit", join->unit->select_limit_cnt).
9418 add("row_estimate",
9419 tab->position()->rows_fetched *
9420 tab->position()->filter_effect);
9421
9422 /* Join with outer join condition */
9423 Item *orig_cond= tab->condition();
9424 tab->and_with_condition(tab->join_cond());
9425
9426 /*
9427 We can't call sel->cond->fix_fields,
9428 as it will break tab->join_cond() if it's AND condition
9429 (fix_fields currently removes extra AND/OR levels).
9430 Yet attributes of the just built condition are not needed.
9431 Thus we call sel->cond->quick_fix_field for safety.
9432 */
9433 if (tab->condition() && !tab->condition()->fixed)
9434 tab->condition()->quick_fix_field();
9435
9436 key_map usable_keys= tab->keys();
9437 ORDER::enum_order interesting_order= ORDER::ORDER_NOT_RELEVANT;
9438
9439 if (recheck_reason == LOW_LIMIT)
9440 {
9441 int read_direction= 0;
9442
9443 /*
9444 If the current plan is to use range, then check if the
9445 already selected index provides the order dictated by the
9446 ORDER BY clause.
9447 */
9448 if (tab->quick() && tab->quick()->index != MAX_KEY)
9449 {
9450 const uint ref_key= tab->quick()->index;
9451
9452 read_direction= test_if_order_by_key(join->order,
9453 tab->table(), ref_key);
9454 /*
9455 If the index provides order there is no need to recheck
9456 index usage; we already know from the former call to
9457 test_quick_select() that a range scan on the chosen
9458 index is cheapest. Note that previous calls to
9459 test_quick_select() did not take order direction
9460 (ASC/DESC) into account, so in case of DESC ordering
9461 we still need to recheck.
9462 */
9463 if ((read_direction == 1) ||
9464 (read_direction == -1 && tab->quick()->reverse_sorted()))
9465 {
9466 recheck_reason= DONT_RECHECK;
9467 }
9468 }
9469 /*
9470 We do a cost based search for an ordering index here. Do this
9471 only if prefer_ordering_index switch is on or an index is
9472 forced for order by
9473 */
9474 if (recheck_reason != DONT_RECHECK &&
9475 (tab->table()->force_index_order ||
9476 thd->optimizer_switch_flag(
9477 OPTIMIZER_SWITCH_PREFER_ORDERING_INDEX)))
9478 {
9479 int best_key= -1;
9480 ha_rows select_limit= join->unit->select_limit_cnt;
9481
9482 /* Use index specified in FORCE INDEX FOR ORDER BY, if any. */
9483 if (tab->table()->force_index)
9484 usable_keys.intersect(tab->table()->keys_in_use_for_order_by);
9485
9486 /* Do a cost based search on the indexes that give sort order */
9487 test_if_cheaper_ordering(tab, join->order, tab->table(),
9488 usable_keys, -1, select_limit,
9489 &best_key, &read_direction,
9490 &select_limit);
9491 if (best_key < 0)
9492 recheck_reason= DONT_RECHECK; // No usable keys
9493 else
9494 {
9495 // Only usable_key is the best_key chosen
9496 usable_keys.clear_all();
9497 usable_keys.set_bit(best_key);
9498 interesting_order= (read_direction == -1 ? ORDER::ORDER_DESC :
9499 ORDER::ORDER_ASC);
9500 }
9501 }
9502 }
9503
9504 bool search_if_impossible= recheck_reason != DONT_RECHECK;
9505 if (search_if_impossible)
9506 {
9507 if (tab->quick())
9508 {
9509 delete tab->quick();
9510 tab->set_type(JT_ALL);
9511 }
9512 QUICK_SELECT_I *qck;
9513 search_if_impossible=
9514 test_quick_select(thd, usable_keys,
9515 used_tables & ~tab->table_ref->map(),
9516 join->calc_found_rows ?
9517 HA_POS_ERROR :
9518 join->unit->select_limit_cnt,
9519 false, // don't force quick range
9520 interesting_order, tab,
9521 tab->condition(),
9522 &tab->needed_reg, &qck,
9523 tab->table()->force_index) < 0;
9524 tab->set_quick(qck);
9525 }
9526 tab->set_condition(orig_cond);
9527 if (search_if_impossible)
9528 {
9529 /*
9530 Before reporting "Impossible WHERE" for the whole query
9531 we have to check isn't it only "impossible ON" instead
9532 */
9533 if (!tab->join_cond())
9534 DBUG_RETURN(1); // No ON, so it's really "impossible WHERE"
9535 Opt_trace_object trace_without_on(trace, "without_ON_clause");
9536 if (tab->quick())
9537 {
9538 delete tab->quick();
9539 tab->set_type(JT_ALL);
9540 }
9541 QUICK_SELECT_I *qck;
9542 const bool impossible_where=
9543 test_quick_select(thd, tab->keys(),
9544 used_tables & ~tab->table_ref->map(),
9545 join->calc_found_rows ?
9546 HA_POS_ERROR :
9547 join->unit->select_limit_cnt,
9548 false, //don't force quick range
9549 ORDER::ORDER_NOT_RELEVANT, tab,
9550 tab->condition(), &tab->needed_reg,
9551 &qck, tab->table()->force_index) < 0;
9552 tab->set_quick(qck);
9553 if (impossible_where)
9554 DBUG_RETURN(1); // Impossible WHERE
9555 }
9556
9557 /*
9558 Access method changed. This is after deciding join order
9559 and access method for all other tables so the info
9560 updated below will not have any effect on the execution
9561 plan.
9562 */
9563 if (tab->quick())
9564 tab->set_type(calc_join_type(tab->quick()->get_type()));
9565
9566 } // end of "if (recheck_reason != DONT_RECHECK)"
9567
9568 if (!tab->table()->quick_keys.is_subset(tab->checked_keys) ||
9569 !tab->needed_reg.is_subset(tab->checked_keys))
9570 {
9571 tab->keys().merge(tab->table()->quick_keys);
9572 tab->keys().merge(tab->needed_reg);
9573
9574 /*
9575 The logic below for assigning tab->use_quick is strange.
9576 It bases the decision of which access method to use
9577 (dynamic range, range, scan) based on seemingly
9578 unrelated information like the presense of another index
9579 with too bad selectivity to be used.
9580
9581 Consider the following scenario:
9582
9583 The join optimizer has decided to use join order
9584 (t1,t2), and 'tab' is currently t2. Further, assume that
9585 there is a join condition between t1 and t2 using some
9586 range operator (e.g. "t1.x < t2.y").
9587
9588 It has been decided that a table scan is best for t2.
9589 make_join_select() then reran the range optimizer a few
9590 lines up because there is an index 't2.good_idx'
9591 covering the t2.y column. If 'good_idx' is the only
9592 index in t2, the decision below will be to use dynamic
9593 range. However, if t2 also has another index 't2.other'
9594 which the range access method can be used on but
9595 selectivity is bad (#rows estimate is high), then table
9596 scan is chosen instead.
9597
9598 Thus, the choice of DYNAMIC RANGE vs SCAN depends on the
9599 presense of an index that has so bad selectivity that it
9600 will not be used anyway.
9601 */
9602 if (!tab->needed_reg.is_clear_all() &&
9603 (tab->table()->quick_keys.is_clear_all() ||
9604 (tab->quick() &&
9605 (tab->quick()->records >= 100L))))
9606 {
9607 tab->use_quick= QS_DYNAMIC_RANGE;
9608 tab->set_type(JT_ALL);
9609 }
9610 else
9611 tab->use_quick= QS_RANGE;
9612 }
9613
9614 if (tab->type() != orig_join_type ||
9615 tab->quick() != orig_quick) // Access method changed
9616 tab->position()->filter_effect= COND_FILTER_STALE;
9617
9618 }
9619 }
9620
9621 if (join->attach_join_conditions(i))
9622 DBUG_RETURN(true);
9623 }
9624 trace_attached_comp.end();
9625
9626 /*
9627 In outer joins the loop above, in iteration for table #i, may push
9628 conditions to a table before #i. Thus, the processing below has to be in
9629 a separate loop:
9630 */
9631 Opt_trace_array trace_attached_summary(trace,
9632 "attached_conditions_summary");
9633 for (uint i= join->const_tables ; i < join->tables ; i++)
9634 {
9635 JOIN_TAB * const tab= join->best_ref[i];
9636 if (!tab->table())
9637 continue;
9638 Item * const cond= tab->condition();
9639 Opt_trace_object trace_one_table(trace);
9640 trace_one_table.add_utf8_table(tab->table_ref).
9641 add("attached", cond);
9642 if (cond &&
9643 cond->has_subquery() /* traverse only if needed */ )
9644 {
9645 /*
9646 Why we pass walk_subquery=false: imagine
9647 WHERE t1.col IN (SELECT * FROM t2
9648 WHERE t2.col IN (SELECT * FROM t3)
9649 and tab==t1. The grandchild subquery (SELECT * FROM t3) should not
9650 be marked as "in condition of t1" but as "in condition of t2", for
9651 correct calculation of the number of its executions.
9652 */
9653 std::pair<SELECT_LEX *, int> pair_object(join->select_lex, i);
9654 cond->walk(&Item::inform_item_in_cond_of_tab,
9655 Item::WALK_POSTFIX,
9656 pointer_cast<uchar * const>(&pair_object));
9657 }
9658
9659 }
9660 }
9661 DBUG_RETURN(0);
9662 }
9663
9664
9665 /**
9666 Remove the following expressions from ORDER BY and GROUP BY:
9667 Constant expressions @n
9668 Expression that only uses tables that are of type EQ_REF and the reference
9669 is in the ORDER list or if all refereed tables are of the above type.
9670
9671 In the following, the X field can be removed:
9672 @code
9673 SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t1.a,t2.X
9674 SELECT * FROM t1,t2,t3 WHERE t1.a=t2.a AND t2.b=t3.b ORDER BY t1.a,t3.X
9675 @endcode
9676
9677 These can't be optimized:
9678 @code
9679 SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t2.X,t1.a
9680 SELECT * FROM t1,t2 WHERE t1.a=t2.a AND t1.b=t2.b ORDER BY t1.a,t2.c
9681 SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t2.b,t1.a
9682 @endcode
9683
9684 @param JOIN join object
9685 @param start_order clause being analyzed (ORDER BY, GROUP BY...)
9686 @param tab table
9687 @param cached_eq_ref_tables bitmap: bit Z is set if the table of map Z
9688 was already the subject of an eq_ref_table() call for the same clause; then
9689 the return value of this previous call can be found at bit Z of
9690 'eq_ref_tables'
9691 @param eq_ref_tables see above.
9692 */
9693
9694 static bool
eq_ref_table(JOIN * join,ORDER * start_order,JOIN_TAB * tab,table_map * cached_eq_ref_tables,table_map * eq_ref_tables)9695 eq_ref_table(JOIN *join, ORDER *start_order, JOIN_TAB *tab,
9696 table_map *cached_eq_ref_tables, table_map *eq_ref_tables)
9697 {
9698 /* We can skip const tables only if not an outer table */
9699 if (tab->type() == JT_CONST && tab->first_inner() == NO_PLAN_IDX)
9700 return true;
9701 if (tab->type() != JT_EQ_REF || tab->table()->is_nullable())
9702 return false;
9703
9704 const table_map map= tab->table_ref->map();
9705 uint found= 0;
9706
9707 for (Item **ref_item= tab->ref().items, **end= ref_item + tab->ref().key_parts ;
9708 ref_item != end ; ref_item++)
9709 {
9710 if (! (*ref_item)->const_item())
9711 { // Not a const ref
9712 ORDER *order;
9713 for (order=start_order ; order ; order=order->next)
9714 {
9715 if ((*ref_item)->eq(order->item[0],0))
9716 break;
9717 }
9718 if (order)
9719 {
9720 if (!(order->used & map))
9721 {
9722 found++;
9723 order->used|= map;
9724 }
9725 continue; // Used in ORDER BY
9726 }
9727 if (!only_eq_ref_tables(join, start_order, (*ref_item)->used_tables(),
9728 cached_eq_ref_tables, eq_ref_tables))
9729 return false;
9730 }
9731 }
9732 /* Check that there was no reference to table before sort order */
9733 for (; found && start_order ; start_order=start_order->next)
9734 {
9735 if (start_order->used & map)
9736 {
9737 found--;
9738 continue;
9739 }
9740 if (start_order->depend_map & map)
9741 return false;
9742 }
9743 return true;
9744 }
9745
9746
9747 /// @see eq_ref_table()
9748 static bool
only_eq_ref_tables(JOIN * join,ORDER * order,table_map tables,table_map * cached_eq_ref_tables,table_map * eq_ref_tables)9749 only_eq_ref_tables(JOIN *join, ORDER *order, table_map tables,
9750 table_map *cached_eq_ref_tables, table_map *eq_ref_tables)
9751 {
9752 tables&= ~PSEUDO_TABLE_BITS;
9753 for (JOIN_TAB **tab=join->map2table ; tables ; tab++, tables>>=1)
9754 {
9755 if (tables & 1)
9756 {
9757 const table_map map= (*tab)->table_ref->map();
9758 bool is_eq_ref;
9759 if (*cached_eq_ref_tables & map) // then there exists a cached bit
9760 is_eq_ref= *eq_ref_tables & map;
9761 else
9762 {
9763 is_eq_ref= eq_ref_table(join, order, *tab,
9764 cached_eq_ref_tables, eq_ref_tables);
9765 if (is_eq_ref)
9766 *eq_ref_tables|= map;
9767 else
9768 *eq_ref_tables&= ~map;
9769 *cached_eq_ref_tables|= map; // now there exists a cached bit
9770 }
9771 if (!is_eq_ref)
9772 return false;
9773 }
9774 }
9775 return true;
9776 }
9777
9778
9779 /**
9780 Check if an expression in ORDER BY or GROUP BY is a duplicate of a
9781 preceding expression.
9782
9783 @param first_order the first expression in the ORDER BY or
9784 GROUP BY clause
9785 @param possible_dup the expression that might be a duplicate of
9786 another expression preceding it the ORDER BY
9787 or GROUP BY clause
9788
9789 @returns true if possible_dup is a duplicate, false otherwise
9790 */
duplicate_order(const ORDER * first_order,const ORDER * possible_dup)9791 static bool duplicate_order(const ORDER *first_order,
9792 const ORDER *possible_dup)
9793 {
9794 const ORDER *order;
9795 for (order=first_order; order ; order=order->next)
9796 {
9797 if (order == possible_dup)
9798 {
9799 // all expressions preceding possible_dup have been checked.
9800 return false;
9801 }
9802 else
9803 {
9804 const Item *it1= order->item[0]->real_item();
9805 const Item *it2= possible_dup->item[0]->real_item();
9806
9807 if (it1->eq(it2, 0))
9808 return true;
9809 }
9810 }
9811 return false;
9812 }
9813
9814 /**
9815 Remove all constants and check if ORDER only contains simple
9816 expressions.
9817
9818 simple_order is set to 1 if sort_order only uses fields from head table
9819 and the head table is not a LEFT JOIN table.
9820
9821 @param first_order List of SORT or GROUP order
9822 @param cond WHERE statement
9823 @param change_list Set to 1 if we should remove things from list.
9824 If this is not set, then only simple_order is
9825 calculated.
9826 @param simple_order Set to 1 if we are only using simple expressions
9827 @param clause_type "ORDER BY" etc for printing in optimizer trace
9828
9829 @return
9830 Returns new sort order
9831 */
9832
remove_const(ORDER * first_order,Item * cond,bool change_list,bool * simple_order,const char * clause_type)9833 ORDER *JOIN::remove_const(ORDER *first_order, Item *cond, bool change_list,
9834 bool *simple_order, const char *clause_type)
9835 {
9836 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
9837
9838 if (plan_is_const())
9839 return change_list ? 0 : first_order; // No need to sort
9840
9841 Opt_trace_context * const trace= &thd->opt_trace;
9842 Opt_trace_disable_I_S trace_disabled(trace, first_order == NULL);
9843 Opt_trace_object trace_wrapper(trace);
9844 Opt_trace_object trace_simpl(trace, "clause_processing");
9845 if (trace->is_started())
9846 {
9847 trace_simpl.add_alnum("clause", clause_type);
9848 String str;
9849 st_select_lex::print_order(&str, first_order,
9850 enum_query_type(QT_TO_SYSTEM_CHARSET |
9851 QT_SHOW_SELECT_NUMBER |
9852 QT_NO_DEFAULT_DB));
9853 trace_simpl.add_utf8("original_clause", str.ptr(), str.length());
9854 }
9855 Opt_trace_array trace_each_item(trace, "items");
9856
9857 ORDER *order,**prev_ptr;
9858 JOIN_TAB *const first_tab= best_ref[const_tables];
9859 table_map first_table= first_tab->table_ref->map();
9860 table_map not_const_tables= ~const_table_map;
9861 table_map ref;
9862 // Caches to avoid repeating eq_ref_table() calls, @see eq_ref_table()
9863 table_map eq_ref_tables= 0, cached_eq_ref_tables= 0;
9864 DBUG_ENTER("JOIN::remove_const");
9865
9866 prev_ptr= &first_order;
9867 *simple_order= !first_tab->join_cond();
9868
9869 /* NOTE: A variable of not_const_tables ^ first_table; breaks gcc 2.7 */
9870
9871 update_depend_map(first_order);
9872 for (order=first_order; order ; order=order->next)
9873 {
9874 Opt_trace_object trace_one_item(trace);
9875 trace_one_item.add("item", order->item[0]);
9876 table_map order_tables=order->item[0]->used_tables();
9877 if (order->item[0]->with_sum_func ||
9878 /*
9879 If the outer table of an outer join is const (either by itself or
9880 after applying WHERE condition), grouping on a field from such a
9881 table will be optimized away and filesort without temporary table
9882 will be used unless we prevent that now. Filesort is not fit to
9883 handle joins and the join condition is not applied. We can't detect
9884 the case without an expensive test, however, so we force temporary
9885 table for all queries containing more than one table, ROLLUP, and an
9886 outer join.
9887 */
9888 (primary_tables > 1 &&
9889 rollup.state == ROLLUP::STATE_INITED &&
9890 select_lex->outer_join))
9891 *simple_order= 0; // Must do a temp table to sort
9892 else if (!(order_tables & not_const_tables))
9893 {
9894 if (order->item[0]->has_subquery())
9895 {
9896 if (!thd->lex->is_explain())
9897 {
9898 Opt_trace_array trace_subselect(trace, "subselect_evaluation");
9899 order->item[0]->val_str(&order->item[0]->str_value);
9900 }
9901 order->item[0]->mark_subqueries_optimized_away();
9902 }
9903 trace_one_item.add("uses_only_constant_tables", true);
9904 continue; // skip const item
9905 }
9906 else if (duplicate_order(first_order, order))
9907 {
9908 /*
9909 If 'order' is a duplicate of an expression earlier in the
9910 ORDER/GROUP BY sequence, it can be removed from the ORDER BY
9911 or GROUP BY clause.
9912 */
9913 trace_one_item.add("duplicate_item", true);
9914 continue;
9915 }
9916 else if (order->in_field_list && order->item[0]->has_subquery())
9917 /*
9918 If the order item is a subquery that is also in the field
9919 list, a temp table should be used to avoid evaluating the
9920 subquery for each row both when a) creating a sort index and
9921 b) getting the value.
9922 Example: "SELECT (SELECT ... ) as a ... GROUP BY a;"
9923 */
9924 *simple_order= false;
9925 else
9926 {
9927 if (order_tables & (RAND_TABLE_BIT | OUTER_REF_TABLE_BIT))
9928 *simple_order=0;
9929 else
9930 {
9931 if (cond && const_expression_in_where(cond,order->item[0]))
9932 {
9933 trace_one_item.add("equals_constant_in_where", true);
9934 continue;
9935 }
9936 if ((ref=order_tables & (not_const_tables ^ first_table)))
9937 {
9938 if (!(order_tables & first_table) &&
9939 only_eq_ref_tables(this, first_order, ref,
9940 &cached_eq_ref_tables, &eq_ref_tables))
9941 {
9942 trace_one_item.add("eq_ref_to_preceding_items", true);
9943 continue;
9944 }
9945 *simple_order=0; // Must do a temp table to sort
9946 }
9947 }
9948 }
9949 if (change_list)
9950 *prev_ptr= order; // use this entry
9951 prev_ptr= &order->next;
9952 }
9953 if (change_list)
9954 *prev_ptr=0;
9955 if (prev_ptr == &first_order) // Nothing to sort/group
9956 *simple_order=1;
9957 DBUG_PRINT("exit",("simple_order: %d",(int) *simple_order));
9958
9959 trace_each_item.end();
9960 trace_simpl.add("resulting_clause_is_simple", *simple_order);
9961 if (trace->is_started() && change_list)
9962 {
9963 String str;
9964 st_select_lex::print_order(&str, first_order,
9965 enum_query_type(QT_TO_SYSTEM_CHARSET |
9966 QT_SHOW_SELECT_NUMBER |
9967 QT_NO_DEFAULT_DB));
9968 trace_simpl.add_utf8("resulting_clause", str.ptr(), str.length());
9969 }
9970
9971 DBUG_RETURN(first_order);
9972 }
9973
9974
9975 /**
9976 Optimize conditions by
9977
9978 a) applying transitivity to build multiple equality predicates
9979 (MEP): if x=y and y=z the MEP x=y=z is built.
9980 b) apply constants where possible. If the value of x is known to be
9981 42, x is replaced with a constant of value 42. By transitivity, this
9982 also applies to MEPs, so the MEP in a) will become 42=x=y=z.
9983 c) remove conditions that are always false or always true
9984
9985 @param thd Thread handler
9986 @param[in,out] cond WHERE or HAVING condition to optimize
9987 @param[out] cond_equal The built multiple equalities
9988 @param join_list list of join operations with join conditions
9989 = NULL: Called for HAVING condition
9990 @param[out] cond_value Not changed if cond was empty
9991 COND_TRUE if cond is always true
9992 COND_FALSE if cond is impossible
9993 COND_OK otherwise
9994
9995 @returns false if success, true if error
9996 */
9997
optimize_cond(THD * thd,Item ** cond,COND_EQUAL ** cond_equal,List<TABLE_LIST> * join_list,Item::cond_result * cond_value)9998 bool optimize_cond(THD *thd, Item **cond, COND_EQUAL **cond_equal,
9999 List<TABLE_LIST> *join_list,
10000 Item::cond_result *cond_value)
10001 {
10002 Opt_trace_context * const trace= &thd->opt_trace;
10003 DBUG_ENTER("optimize_cond");
10004
10005 Opt_trace_object trace_wrapper(trace);
10006 Opt_trace_object trace_cond(trace, "condition_processing");
10007 trace_cond.add_alnum("condition", join_list ? "WHERE" : "HAVING");
10008 trace_cond.add("original_condition", *cond);
10009 Opt_trace_array trace_steps(trace, "steps");
10010
10011 /*
10012 Enter this function
10013 a) For a WHERE condition or a query having outer join.
10014 b) For a HAVING condition.
10015 */
10016 assert(*cond || join_list);
10017
10018 /*
10019 Build all multiple equality predicates and eliminate equality
10020 predicates that can be inferred from these multiple equalities.
10021 For each reference of a field included into a multiple equality
10022 that occurs in a function set a pointer to the multiple equality
10023 predicate. Substitute a constant instead of this field if the
10024 multiple equality contains a constant.
10025 This is performed for the WHERE condition and any join conditions, but
10026 not for the HAVING condition.
10027 */
10028 if (join_list)
10029 {
10030 Opt_trace_object step_wrapper(trace);
10031 step_wrapper.add_alnum("transformation", "equality_propagation");
10032 {
10033 Opt_trace_disable_I_S
10034 disable_trace_wrapper(trace, !(*cond && (*cond)->has_subquery()));
10035 Opt_trace_array
10036 trace_subselect(trace, "subselect_evaluation");
10037 if (build_equal_items(thd, *cond, cond, NULL, true,
10038 join_list, cond_equal))
10039 DBUG_RETURN(true);
10040 }
10041 step_wrapper.add("resulting_condition", *cond);
10042 }
10043 /* change field = field to field = const for each found field = const */
10044 if (*cond)
10045 {
10046 Opt_trace_object step_wrapper(trace);
10047 step_wrapper.add_alnum("transformation", "constant_propagation");
10048 {
10049 Opt_trace_disable_I_S
10050 disable_trace_wrapper(trace, !(*cond)->has_subquery());
10051 Opt_trace_array trace_subselect(trace, "subselect_evaluation");
10052 if (propagate_cond_constants(thd, NULL, *cond, *cond))
10053 DBUG_RETURN(true);
10054 }
10055 step_wrapper.add("resulting_condition", *cond);
10056 }
10057
10058 /*
10059 Remove all instances of item == item
10060 Remove all and-levels where CONST item != CONST item
10061 */
10062 DBUG_EXECUTE("where",print_where(*cond,"after const change", QT_ORDINARY););
10063 if (*cond)
10064 {
10065 Opt_trace_object step_wrapper(trace);
10066 step_wrapper.add_alnum("transformation", "trivial_condition_removal");
10067 {
10068 Opt_trace_disable_I_S
10069 disable_trace_wrapper(trace, !(*cond)->has_subquery());
10070 Opt_trace_array trace_subselect(trace, "subselect_evaluation");
10071 if (remove_eq_conds(thd, *cond, cond, cond_value))
10072 DBUG_RETURN(true);
10073 }
10074 step_wrapper.add("resulting_condition", *cond);
10075 }
10076 assert(!thd->is_error());
10077 if (thd->is_error())
10078 DBUG_RETURN(true);
10079 DBUG_RETURN(false);
10080 }
10081
10082
10083 /**
10084 Handle the recursive job for remove_eq_conds()
10085
10086 @param thd Thread handler
10087 @param cond the condition to handle.
10088 @param[out] retcond Modified condition after removal
10089 @param[out] cond_value the resulting value of the condition
10090
10091 @see remove_eq_conds() for more details on argument
10092
10093 @returns false if success, true if error
10094 */
10095
internal_remove_eq_conds(THD * thd,Item * cond,Item ** retcond,Item::cond_result * cond_value)10096 static bool internal_remove_eq_conds(THD *thd, Item *cond,
10097 Item **retcond,
10098 Item::cond_result *cond_value)
10099 {
10100 if (cond->type() == Item::COND_ITEM)
10101 {
10102 Item_cond *const item_cond= down_cast<Item_cond *>(cond);
10103 const bool and_level= item_cond->functype() == Item_func::COND_AND_FUNC;
10104 List_iterator<Item> li(*item_cond->argument_list());
10105 bool should_fix_fields= false;
10106
10107 *cond_value=Item::COND_UNDEF;
10108 Item *item;
10109 while ((item=li++))
10110 {
10111 Item *new_item;
10112 Item::cond_result tmp_cond_value;
10113 if (internal_remove_eq_conds(thd, item, &new_item, &tmp_cond_value))
10114 return true;
10115
10116 if (new_item == NULL)
10117 li.remove();
10118 else if (item != new_item)
10119 {
10120 (void) li.replace(new_item);
10121 should_fix_fields= true;
10122 }
10123 if (*cond_value == Item::COND_UNDEF)
10124 *cond_value= tmp_cond_value;
10125 switch (tmp_cond_value)
10126 {
10127 case Item::COND_OK: // Not TRUE or FALSE
10128 if (and_level || *cond_value == Item::COND_FALSE)
10129 *cond_value= tmp_cond_value;
10130 break;
10131 case Item::COND_FALSE:
10132 if (and_level) // Always false
10133 {
10134 *cond_value= tmp_cond_value;
10135 *retcond= NULL;
10136 return false;
10137 }
10138 break;
10139 case Item::COND_TRUE:
10140 if (!and_level) // Always true
10141 {
10142 *cond_value= tmp_cond_value;
10143 *retcond= NULL;
10144 return false;
10145 }
10146 break;
10147 case Item::COND_UNDEF: // Impossible
10148 assert(false); /* purecov: deadcode */
10149 }
10150 }
10151 if (should_fix_fields)
10152 item_cond->update_used_tables();
10153
10154 if (item_cond->argument_list()->elements == 0 ||
10155 *cond_value != Item::COND_OK)
10156 {
10157 *retcond= NULL;
10158 return false;
10159 }
10160 if (item_cond->argument_list()->elements == 1)
10161 {
10162 /*
10163 BUG#11765699:
10164 We're dealing with an AND or OR item that has only one
10165 argument. However, it is not an option to empty the list
10166 because:
10167
10168 - this function is called for either JOIN::conds or
10169 JOIN::having, but these point to the same condition as
10170 SELECT_LEX::where and SELECT_LEX::having do.
10171
10172 - The return value of remove_eq_conds() is assigned to
10173 JOIN::conds and JOIN::having, so emptying the list and
10174 returning the only remaining item "replaces" the AND or OR
10175 with item for the variables in JOIN. However, the return
10176 value is not assigned to the SELECT_LEX counterparts. Thus,
10177 if argument_list is emptied, SELECT_LEX forgets the item in
10178 argument_list()->head().
10179
10180 item is therefore returned, but argument_list is not emptied.
10181 */
10182 item= item_cond->argument_list()->head();
10183 /*
10184 Consider reenabling the line below when the optimizer has been
10185 split into properly separated phases.
10186
10187 item_cond->argument_list()->empty();
10188 */
10189 *retcond= item;
10190 return false;
10191 }
10192 }
10193 else if (cond->type() == Item::FUNC_ITEM &&
10194 down_cast<Item_func *>(cond)->functype() == Item_func::ISNULL_FUNC)
10195 {
10196 Item_func_isnull *const func= down_cast<Item_func_isnull *>(cond);
10197 Item **args= func->arguments();
10198 if (args[0]->type() == Item::FIELD_ITEM)
10199 {
10200 Field *const field= down_cast<Item_field *>(args[0])->field;
10201 /* fix to replace 'NULL' dates with '0' (shreeve@uci.edu) */
10202 /*
10203 See BUG#12594011
10204 Documentation says that
10205 SELECT datetime_notnull d FROM t1 WHERE d IS NULL
10206 shall return rows where d=='0000-00-00'
10207
10208 Thus, for DATE and DATETIME columns defined as NOT NULL,
10209 "date_notnull IS NULL" has to be modified to
10210 "date_notnull IS NULL OR date_notnull == 0" (if outer join)
10211 "date_notnull == 0" (otherwise)
10212
10213 */
10214 if (((field->type() == MYSQL_TYPE_DATE) ||
10215 (field->type() == MYSQL_TYPE_DATETIME)) &&
10216 (field->flags & NOT_NULL_FLAG))
10217 {
10218 Item *item0= new(thd->mem_root) Item_int((longlong)0, 1);
10219 if (item0 == NULL)
10220 return true;
10221 Item *eq_cond= new(thd->mem_root) Item_func_eq(args[0], item0);
10222 if (eq_cond == NULL)
10223 return true;
10224
10225 if (args[0]->is_outer_field())
10226 {
10227 // outer join: transform "col IS NULL" to "col IS NULL or col=0"
10228 Item *or_cond= new(thd->mem_root) Item_cond_or(eq_cond, cond);
10229 if (or_cond == NULL)
10230 return true;
10231 cond= or_cond;
10232 }
10233 else
10234 {
10235 // not outer join: transform "col IS NULL" to "col=0"
10236 cond= eq_cond;
10237 }
10238
10239 if (cond->fix_fields(thd, &cond))
10240 return true;
10241 }
10242 }
10243 if (cond->const_item())
10244 {
10245 bool value;
10246 if (eval_const_cond(thd, cond, &value))
10247 return true;
10248 *cond_value= value ? Item::COND_TRUE : Item::COND_FALSE;
10249 *retcond= NULL;
10250 return false;
10251 }
10252 }
10253 else if (cond->const_item() && !cond->is_expensive())
10254 {
10255 bool value;
10256 if (eval_const_cond(thd, cond, &value))
10257 return true;
10258 *cond_value= value ? Item::COND_TRUE : Item::COND_FALSE;
10259 *retcond= NULL;
10260 return false;
10261 }
10262 else
10263 { // boolan compare function
10264 *cond_value= cond->eq_cmp_result();
10265 if (*cond_value == Item::COND_OK)
10266 {
10267 *retcond= cond;
10268 return false;
10269 }
10270 Item *left_item= down_cast<Item_func *>(cond)->arguments()[0];
10271 Item *right_item= down_cast<Item_func *>(cond)->arguments()[1];
10272 if (left_item->eq(right_item,1))
10273 {
10274 if (!left_item->maybe_null ||
10275 down_cast<Item_func *>(cond)->functype() == Item_func::EQUAL_FUNC)
10276 {
10277 *retcond= NULL;
10278 return false; // Compare of identical items
10279 }
10280 }
10281 }
10282 *cond_value= Item::COND_OK;
10283 *retcond= cond; // Point at next and level
10284 return false;
10285 }
10286
10287
10288 /**
10289 Remove const and eq items. Return new item, or NULL if no condition
10290
10291 @param thd thread handler
10292 @param cond the condition to handle
10293 @param[out] retcond condition after const removal
10294 @param[out] cond_value resulting value of the condition
10295 =COND_OK condition must be evaluated (e.g field = constant)
10296 =COND_TRUE always true (e.g 1 = 1)
10297 =COND_FALSE always false (e.g 1 = 2)
10298
10299 @note calls internal_remove_eq_conds() to check the complete tree.
10300
10301 @returns false if success, true if error
10302 */
10303
remove_eq_conds(THD * thd,Item * cond,Item ** retcond,Item::cond_result * cond_value)10304 bool remove_eq_conds(THD *thd, Item *cond, Item **retcond,
10305 Item::cond_result *cond_value)
10306 {
10307 if (cond->type() == Item::FUNC_ITEM &&
10308 down_cast<Item_func *>(cond)->functype() == Item_func::ISNULL_FUNC)
10309 {
10310 /*
10311 Handles this special case for some ODBC applications:
10312 The are requesting the row that was just updated with a auto_increment
10313 value with this construct:
10314
10315 SELECT * from table_name where auto_increment_column IS NULL
10316 This will be changed to:
10317 SELECT * from table_name where auto_increment_column = LAST_INSERT_ID
10318 */
10319
10320 Item_func_isnull *const func= down_cast<Item_func_isnull *>(cond);
10321 Item **args= func->arguments();
10322 if (args[0]->type() == Item::FIELD_ITEM)
10323 {
10324 Field *const field= down_cast<Item_field *>(args[0])->field;
10325 if ((field->flags & AUTO_INCREMENT_FLAG) &&
10326 !field->table->is_nullable() &&
10327 (thd->variables.option_bits & OPTION_AUTO_IS_NULL) &&
10328 (thd->first_successful_insert_id_in_prev_stmt > 0 &&
10329 thd->substitute_null_with_insert_id))
10330 {
10331 query_cache.abort(&thd->query_cache_tls);
10332
10333 cond= new Item_func_eq(
10334 args[0],
10335 new Item_int(NAME_STRING("last_insert_id()"),
10336 thd->read_first_successful_insert_id_in_prev_stmt(),
10337 MY_INT64_NUM_DECIMAL_DIGITS));
10338 if (cond == NULL)
10339 return true;
10340
10341 if (cond->fix_fields(thd, &cond))
10342 return true;
10343
10344 /*
10345 IS NULL should be mapped to LAST_INSERT_ID only for first row, so
10346 clear for next row
10347 */
10348 thd->substitute_null_with_insert_id= FALSE;
10349
10350 *cond_value= Item::COND_OK;
10351 *retcond= cond;
10352 return false;
10353 }
10354 }
10355 }
10356 return internal_remove_eq_conds(thd, cond, retcond, cond_value);
10357 }
10358
10359
10360 /**
10361 Check if GROUP BY/DISTINCT can be optimized away because the set is
10362 already known to be distinct.
10363
10364 Used in removing the GROUP BY/DISTINCT of the following types of
10365 statements:
10366 @code
10367 SELECT [DISTINCT] <unique_key_cols>... FROM <single_table_ref>
10368 [GROUP BY <unique_key_cols>,...]
10369 @endcode
10370
10371 If (a,b,c is distinct)
10372 then <any combination of a,b,c>,{whatever} is also distinct
10373
10374 This function checks if all the key parts of any of the unique keys
10375 of the table are referenced by a list : either the select list
10376 through find_field_in_item_list or GROUP BY list through
10377 find_field_in_order_list.
10378 If the above holds and the key parts cannot contain NULLs then we
10379 can safely remove the GROUP BY/DISTINCT,
10380 as no result set can be more distinct than an unique key.
10381
10382 @param tab The join table to operate on.
10383 @param find_func function to iterate over the list and search
10384 for a field
10385
10386 @retval
10387 1 found
10388 @retval
10389 0 not found.
10390
10391 @note
10392 The function assumes that make_outerjoin_info() has been called in
10393 order for the check for outer tables to work.
10394 */
10395
10396 static bool
list_contains_unique_index(JOIN_TAB * tab,bool (* find_func)(Field *,void *),void * data)10397 list_contains_unique_index(JOIN_TAB *tab,
10398 bool (*find_func) (Field *, void *), void *data)
10399 {
10400 TABLE *table= tab->table();
10401
10402 if (tab->is_inner_table_of_outer_join())
10403 return 0;
10404 for (uint keynr= 0; keynr < table->s->keys; keynr++)
10405 {
10406 if (keynr == table->s->primary_key ||
10407 (table->key_info[keynr].flags & HA_NOSAME))
10408 {
10409 KEY *keyinfo= table->key_info + keynr;
10410 KEY_PART_INFO *key_part, *key_part_end;
10411
10412 for (key_part=keyinfo->key_part,
10413 key_part_end=key_part+ keyinfo->user_defined_key_parts;
10414 key_part < key_part_end;
10415 key_part++)
10416 {
10417 if (key_part->field->real_maybe_null() ||
10418 !find_func(key_part->field, data))
10419 break;
10420 }
10421 if (key_part == key_part_end)
10422 return 1;
10423 }
10424 }
10425 return 0;
10426 }
10427
10428
10429 /**
10430 Helper function for list_contains_unique_index.
10431 Find a field reference in a list of ORDER structures.
10432 Finds a direct reference of the Field in the list.
10433
10434 @param field The field to search for.
10435 @param data ORDER *.The list to search in
10436
10437 @retval
10438 1 found
10439 @retval
10440 0 not found.
10441 */
10442
10443 static bool
find_field_in_order_list(Field * field,void * data)10444 find_field_in_order_list (Field *field, void *data)
10445 {
10446 ORDER *group= (ORDER *) data;
10447 bool part_found= 0;
10448 for (ORDER *tmp_group= group; tmp_group; tmp_group=tmp_group->next)
10449 {
10450 Item *item= (*tmp_group->item)->real_item();
10451 if (item->type() == Item::FIELD_ITEM &&
10452 ((Item_field*) item)->field->eq(field))
10453 {
10454 part_found= 1;
10455 break;
10456 }
10457 }
10458 return part_found;
10459 }
10460
10461
10462 /**
10463 Helper function for list_contains_unique_index.
10464 Find a field reference in a dynamic list of Items.
10465 Finds a direct reference of the Field in the list.
10466
10467 @param[in] field The field to search for.
10468 @param[in] data List<Item> *.The list to search in
10469
10470 @retval
10471 1 found
10472 @retval
10473 0 not found.
10474 */
10475
10476 static bool
find_field_in_item_list(Field * field,void * data)10477 find_field_in_item_list (Field *field, void *data)
10478 {
10479 List<Item> *fields= (List<Item> *) data;
10480 bool part_found= 0;
10481 List_iterator<Item> li(*fields);
10482 Item *item;
10483
10484 while ((item= li++))
10485 {
10486 if (item->type() == Item::FIELD_ITEM &&
10487 ((Item_field*) item)->field->eq(field))
10488 {
10489 part_found= 1;
10490 break;
10491 }
10492 }
10493 return part_found;
10494 }
10495
10496
10497 /**
10498 Create a group by that consist of all non const fields.
10499
10500 Try to use the fields in the order given by 'order' to allow one to
10501 optimize away 'order by'.
10502 */
10503
10504 static ORDER *
create_distinct_group(THD * thd,Ref_ptr_array ref_pointer_array,ORDER * order_list,List<Item> & fields,List<Item> & all_fields,bool * all_order_by_fields_used)10505 create_distinct_group(THD *thd, Ref_ptr_array ref_pointer_array,
10506 ORDER *order_list, List<Item> &fields,
10507 List<Item> &all_fields,
10508 bool *all_order_by_fields_used)
10509 {
10510 List_iterator<Item> li(fields);
10511 Item *item;
10512 ORDER *order,*group,**prev;
10513
10514 *all_order_by_fields_used= 1;
10515 while ((item=li++))
10516 item->marker=0; /* Marker that field is not used */
10517
10518 prev= &group; group=0;
10519 for (order=order_list ; order; order=order->next)
10520 {
10521 if (order->in_field_list)
10522 {
10523 ORDER *ord=(ORDER*) thd->memdup((char*) order,sizeof(ORDER));
10524 if (!ord)
10525 return 0;
10526 *prev=ord;
10527 prev= &ord->next;
10528 (*ord->item)->marker=1;
10529 }
10530 else
10531 *all_order_by_fields_used= 0;
10532 }
10533
10534 li.rewind();
10535 while ((item=li++))
10536 {
10537 if (!item->const_item() && !item->with_sum_func && !item->marker)
10538 {
10539 /*
10540 Don't put duplicate columns from the SELECT list into the
10541 GROUP BY list.
10542 */
10543 ORDER *ord_iter;
10544 for (ord_iter= group; ord_iter; ord_iter= ord_iter->next)
10545 if ((*ord_iter->item)->eq(item, 1))
10546 goto next_item;
10547
10548 ORDER *ord=(ORDER*) thd->mem_calloc(sizeof(ORDER));
10549 if (!ord)
10550 return 0;
10551
10552 if (item->type() == Item::FIELD_ITEM &&
10553 item->field_type() == MYSQL_TYPE_BIT)
10554 {
10555 /*
10556 Because HEAP tables can't index BIT fields we need to use an
10557 additional hidden field for grouping because later it will be
10558 converted to a LONG field. Original field will remain of the
10559 BIT type and will be returned to a client.
10560 @note setup_ref_array() needs to account for the extra space.
10561 */
10562 Item_field *new_item= new Item_field(thd, (Item_field*)item);
10563 ord->item= thd->lex->current_select()->add_hidden_item(new_item);
10564 }
10565 else
10566 {
10567 /*
10568 We have here only field_list (not all_field_list), so we can use
10569 simple indexing of ref_pointer_array (order in the array and in the
10570 list are same)
10571 */
10572 ord->item= &ref_pointer_array[0];
10573 }
10574 ord->direction= ORDER::ORDER_ASC;
10575 *prev=ord;
10576 prev= &ord->next;
10577 }
10578 next_item:
10579 ref_pointer_array.pop_front();
10580 }
10581 *prev=0;
10582 return group;
10583 }
10584
10585
10586 /**
10587 Return table number if there is only one table in sort order
10588 and group and order is compatible, else return 0.
10589 */
10590
10591 static TABLE *
get_sort_by_table(ORDER * a,ORDER * b,TABLE_LIST * tables)10592 get_sort_by_table(ORDER *a,ORDER *b,TABLE_LIST *tables)
10593 {
10594 table_map map= (table_map) 0;
10595 DBUG_ENTER("get_sort_by_table");
10596
10597 if (!a)
10598 a=b; // Only one need to be given
10599 else if (!b)
10600 b=a;
10601
10602 for (; a && b; a=a->next,b=b->next)
10603 {
10604 if (!(*a->item)->eq(*b->item,1))
10605 DBUG_RETURN(0);
10606 map|=a->item[0]->used_tables();
10607 }
10608 map&= ~PARAM_TABLE_BIT;
10609 if (!map || (map & (RAND_TABLE_BIT | OUTER_REF_TABLE_BIT)))
10610 DBUG_RETURN(0);
10611
10612 for (; !(map & tables->map()); tables= tables->next_leaf) ;
10613 if (map != tables->map())
10614 DBUG_RETURN(0); // More than one table
10615 DBUG_PRINT("exit",("sort by table: %d",tables->tableno()));
10616 DBUG_RETURN(tables->table);
10617 }
10618
10619
10620 /**
10621 Create a condition for a const reference for a table.
10622
10623 @param thd THD pointer
10624 @param join_tab pointer to the table
10625
10626 @return A pointer to the created condition for the const reference.
10627 @retval !NULL if the condition was created successfully
10628 @retval NULL if an error has occured
10629 */
10630
create_cond_for_const_ref(THD * thd,JOIN_TAB * join_tab)10631 static Item_cond_and *create_cond_for_const_ref(THD *thd, JOIN_TAB *join_tab)
10632 {
10633 DBUG_ENTER("create_cond_for_const_ref");
10634 assert(join_tab->ref().key_parts);
10635
10636 TABLE *table= join_tab->table();
10637 Item_cond_and *cond= new Item_cond_and();
10638 if (!cond)
10639 DBUG_RETURN(NULL);
10640
10641 for (uint i=0 ; i < join_tab->ref().key_parts ; i++)
10642 {
10643 Field *field= table->field[table->key_info[join_tab->ref().key].key_part[i].
10644 fieldnr-1];
10645 Item *value= join_tab->ref().items[i];
10646 Item *item= new Item_field(field);
10647 if (!item)
10648 DBUG_RETURN(NULL);
10649 item= join_tab->ref().null_rejecting & ((key_part_map)1 << i) ?
10650 (Item *)new Item_func_eq(item, value) :
10651 (Item *)new Item_func_equal(item, value);
10652 if (!item)
10653 DBUG_RETURN(NULL);
10654 if (cond->add(item))
10655 DBUG_RETURN(NULL);
10656 }
10657 cond->fix_fields(thd, (Item**)&cond);
10658
10659 DBUG_RETURN(cond);
10660 }
10661
10662 /**
10663 Create a condition for a const reference and add this to the
10664 currenct select for the table.
10665 */
10666
add_ref_to_table_cond(THD * thd,JOIN_TAB * join_tab)10667 static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab)
10668 {
10669 DBUG_ENTER("add_ref_to_table_cond");
10670 if (!join_tab->ref().key_parts)
10671 DBUG_RETURN(FALSE);
10672
10673 int error= 0;
10674
10675 /* Create a condition representing the const reference. */
10676 Item_cond_and *cond= create_cond_for_const_ref(thd, join_tab);
10677 if (!cond)
10678 DBUG_RETURN(TRUE);
10679
10680 /* Add this condition to the existing select condtion */
10681 if (join_tab->condition())
10682 {
10683 error=(int) cond->add(join_tab->condition());
10684 cond->update_used_tables();
10685 }
10686 join_tab->set_condition(cond);
10687 Opt_trace_object(&thd->opt_trace).add("added_back_ref_condition", cond);
10688
10689 DBUG_RETURN(error ? TRUE : FALSE);
10690 }
10691
10692
10693 /**
10694 Remove additional condition inserted by IN/ALL/ANY transformation.
10695
10696 @param conds condition for processing
10697
10698 @return
10699 new conditions
10700
10701 @note that this function has Bug#13915291.
10702 */
10703
remove_additional_cond(Item * conds)10704 static Item *remove_additional_cond(Item* conds)
10705 {
10706 // Because it uses in_additional_cond it applies only to the scalar case.
10707 if (conds->item_name.ptr() == in_additional_cond)
10708 return 0;
10709 if (conds->type() == Item::COND_ITEM)
10710 {
10711 Item_cond *cnd= (Item_cond*) conds;
10712 List_iterator<Item> li(*(cnd->argument_list()));
10713 Item *item;
10714 while ((item= li++))
10715 {
10716 if (item->item_name.ptr() == in_additional_cond)
10717 {
10718 li.remove();
10719 if (cnd->argument_list()->elements == 1)
10720 return cnd->argument_list()->head();
10721 return conds;
10722 }
10723 }
10724 }
10725 return conds;
10726 }
10727
10728
10729 /**
10730 Update some values in keyuse for faster choose_table_order() loop.
10731
10732 @todo Check if this is the real meaning of ref_table_rows.
10733
10734 @param keyuse_array Array of Key_use elements being updated.
10735
10736
10737 */
10738
optimize_keyuse()10739 void JOIN::optimize_keyuse()
10740 {
10741 for (size_t ix= 0; ix < keyuse_array.size(); ++ix)
10742 {
10743 Key_use *keyuse= &keyuse_array.at(ix);
10744 table_map map;
10745 /*
10746 If we find a ref, assume this table matches a proportional
10747 part of this table.
10748 For example 100 records matching a table with 5000 records
10749 gives 5000/100 = 50 records per key
10750 Constant tables are ignored.
10751 To avoid bad matches, we don't make ref_table_rows less than 100.
10752 */
10753 keyuse->ref_table_rows= ~(ha_rows) 0; // If no ref
10754 if (keyuse->used_tables &
10755 (map= (keyuse->used_tables & ~const_table_map & ~PSEUDO_TABLE_BITS)))
10756 {
10757 uint tableno;
10758 for (tableno= 0; ! (map & 1) ; map>>=1, tableno++)
10759 {}
10760 if (map == 1) // Only one table
10761 {
10762 TABLE *tmp_table= join_tab[tableno].table();
10763
10764 keyuse->ref_table_rows= max<ha_rows>(tmp_table->file->stats.records, 100);
10765 }
10766 }
10767 /*
10768 Outer reference (external field) is constant for single executing
10769 of subquery
10770 */
10771 if (keyuse->used_tables == OUTER_REF_TABLE_BIT)
10772 keyuse->ref_table_rows= 1;
10773 }
10774 }
10775
10776 /**
10777 Function sets FT hints, initializes FT handlers
10778 and checks if FT index can be used as covered.
10779 */
10780
optimize_fts_query()10781 bool JOIN::optimize_fts_query()
10782 {
10783 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
10784
10785 assert(select_lex->has_ft_funcs());
10786
10787 for (uint i= const_tables; i < tables; i++)
10788 {
10789 JOIN_TAB *tab= best_ref[i];
10790 if (tab->type() != JT_FT)
10791 continue;
10792
10793 Item_func_match *ifm;
10794 Item_func_match* ft_func=
10795 static_cast<Item_func_match*>(tab->position()->key->val);
10796 List_iterator<Item_func_match> li(*(select_lex->ftfunc_list));
10797
10798 while ((ifm= li++))
10799 {
10800 if (!(ifm->used_tables() & tab->table_ref->map()) || ifm->master)
10801 continue;
10802
10803 if (ifm != ft_func)
10804 {
10805 if (ifm->can_skip_ranking())
10806 ifm->set_hints(this, FT_NO_RANKING, HA_POS_ERROR, false);
10807 }
10808 }
10809
10810 /*
10811 Check if internal sorting is needed. FT_SORTED flag is set
10812 if no ORDER BY clause or ORDER BY MATCH function is the same
10813 as the function that is used for FT index and FT table is
10814 the first non-constant table in the JOIN.
10815 */
10816 if (i == const_tables &&
10817 !(ft_func->get_hints()->get_flags() & FT_BOOL) &&
10818 (!order || ft_func == test_if_ft_index_order(order)))
10819 ft_func->set_hints(this, FT_SORTED, m_select_limit, false);
10820
10821 /*
10822 Check if ranking is not needed. FT_NO_RANKING flag is set if
10823 MATCH function is used only in WHERE condition and MATCH
10824 function is not part of an expression.
10825 */
10826 if (ft_func->can_skip_ranking())
10827 ft_func->set_hints(this, FT_NO_RANKING,
10828 !order ? m_select_limit : HA_POS_ERROR, false);
10829 }
10830
10831 return init_ftfuncs(thd, select_lex);
10832 }
10833
10834
10835 /**
10836 Check if FTS index only access is possible.
10837
10838 @param tab pointer to JOIN_TAB structure.
10839
10840 @return TRUE if index only access is possible,
10841 FALSE otherwise.
10842 */
10843
fts_index_access(JOIN_TAB * tab)10844 bool JOIN::fts_index_access(JOIN_TAB *tab)
10845 {
10846 assert(tab->type() == JT_FT);
10847 TABLE *table= tab->table();
10848
10849 if ((table->file->ha_table_flags() & HA_CAN_FULLTEXT_EXT) == 0)
10850 return false; // Optimizations requires extended FTS support by table engine
10851
10852 /*
10853 This optimization does not work with filesort nor GROUP BY
10854 */
10855 if (grouped || (order && ordered_index_usage != ordered_index_order_by))
10856 return false;
10857
10858 /*
10859 Check whether the FTS result is covering. If only document id
10860 and rank is needed, there is no need to access table rows.
10861 */
10862 for (uint i= bitmap_get_first_set(table->read_set);
10863 i < table->s->fields;
10864 i= bitmap_get_next_set(table->read_set, i))
10865 {
10866 if (table->field[i] != table->fts_doc_id_field ||
10867 !tab->ft_func()->docid_in_result())
10868 return false;
10869 }
10870
10871 return true;
10872 }
10873
10874
10875 /**
10876 For {semijoin,subquery} materialization: calculates various cost
10877 information, based on a plan in join->best_positions covering the
10878 to-be-materialized query block and only this.
10879
10880 @param join JOIN where plan can be found
10881 @param sj_nest sj materialization nest (NULL if subquery materialization)
10882 @param n_tables number of to-be-materialized tables
10883 @param[out] sjm where computed costs will be stored
10884
10885 @note that this function modifies join->map2table, which has to be filled
10886 correctly later.
10887 */
calculate_materialization_costs(JOIN * join,TABLE_LIST * sj_nest,uint n_tables,Semijoin_mat_optimize * sjm)10888 static void calculate_materialization_costs(JOIN *join,
10889 TABLE_LIST *sj_nest,
10890 uint n_tables,
10891 Semijoin_mat_optimize *sjm)
10892 {
10893 double mat_cost; // Estimated cost of materialization
10894 double mat_rowcount; // Estimated row count before duplicate removal
10895 double distinct_rowcount; // Estimated rowcount after duplicate removal
10896 List<Item> *inner_expr_list;
10897
10898 if (sj_nest)
10899 {
10900 /*
10901 get_partial_join_cost() assumes a regular join, which is correct when
10902 we optimize a sj-materialization nest (always executed as regular
10903 join).
10904 */
10905 get_partial_join_cost(join, n_tables, &mat_cost, &mat_rowcount);
10906 n_tables+= join->const_tables;
10907 inner_expr_list= &sj_nest->nested_join->sj_inner_exprs;
10908 }
10909 else
10910 {
10911 mat_cost= join->best_read;
10912 mat_rowcount= static_cast<double>(join->best_rowcount);
10913 inner_expr_list= &join->select_lex->item_list;
10914 }
10915
10916 /*
10917 Adjust output cardinality estimates. If the subquery has form
10918
10919 ... oe IN (SELECT t1.colX, t2.colY, func(X,Y,Z) )
10920
10921 then the number of distinct output record combinations has an
10922 upper bound of product of number of records matching the tables
10923 that are used by the SELECT clause.
10924 TODO:
10925 We can get a more precise estimate if we
10926 - use rec_per_key cardinality estimates. For simple cases like
10927 "oe IN (SELECT t.key ...)" it is trivial.
10928 - Functional dependencies between the tables in the semi-join
10929 nest (the payoff is probably less here?)
10930 */
10931 {
10932 for (uint i=0 ; i < n_tables ; i++)
10933 {
10934 JOIN_TAB * const tab= join->best_positions[i].table;
10935 join->map2table[tab->table_ref->tableno()]= tab;
10936 }
10937 List_iterator<Item> it(*inner_expr_list);
10938 Item *item;
10939 table_map map= 0;
10940 while ((item= it++))
10941 map|= item->used_tables();
10942 map&= ~PSEUDO_TABLE_BITS;
10943 Table_map_iterator tm_it(map);
10944 int tableno;
10945 double rows= 1.0;
10946 while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
10947 rows*= join->map2table[tableno]->table()->quick_condition_rows;
10948 distinct_rowcount= min(mat_rowcount, rows);
10949 }
10950 /*
10951 Calculate temporary table parameters and usage costs
10952 */
10953 const uint rowlen= get_tmp_table_rec_length(*inner_expr_list);
10954
10955 const Cost_model_server *cost_model= join->cost_model();
10956
10957 Cost_model_server::enum_tmptable_type tmp_table_type;
10958 if (rowlen * distinct_rowcount < join->thd->variables.max_heap_table_size)
10959 tmp_table_type= Cost_model_server::MEMORY_TMPTABLE;
10960 else
10961 tmp_table_type= Cost_model_server::DISK_TMPTABLE;
10962
10963 /*
10964 Let materialization cost include the cost to create the temporary
10965 table and write the rows into it:
10966 */
10967 mat_cost+= cost_model->tmptable_create_cost(tmp_table_type);
10968 mat_cost+= cost_model->tmptable_readwrite_cost(tmp_table_type, mat_rowcount,
10969 0.0);
10970
10971 sjm->materialization_cost.reset();
10972 sjm->materialization_cost.add_io(mat_cost);
10973
10974 sjm->expected_rowcount= distinct_rowcount;
10975
10976 /*
10977 Set the cost to do a full scan of the temptable (will need this to
10978 consider doing sjm-scan):
10979 */
10980 sjm->scan_cost.reset();
10981 if (distinct_rowcount > 0.0)
10982 {
10983 const double scan_cost=
10984 cost_model->tmptable_readwrite_cost(tmp_table_type,
10985 0.0, distinct_rowcount);
10986 sjm->scan_cost.add_io(scan_cost);
10987 }
10988
10989 // The cost to lookup a row in temp. table
10990 const double row_cost= cost_model->tmptable_readwrite_cost(tmp_table_type,
10991 0.0, 1.0);
10992 sjm->lookup_cost.reset();
10993 sjm->lookup_cost.add_io(row_cost);
10994 }
10995
10996
10997 /**
10998 Decides between EXISTS and materialization; performs last steps to set up
10999 the chosen strategy.
11000 @returns 'false' if no error
11001
11002 @note If UNION this is called on each contained JOIN.
11003
11004 */
decide_subquery_strategy()11005 bool JOIN::decide_subquery_strategy()
11006 {
11007 assert(unit->item);
11008
11009 switch (unit->item->substype())
11010 {
11011 case Item_subselect::IN_SUBS:
11012 case Item_subselect::ALL_SUBS:
11013 case Item_subselect::ANY_SUBS:
11014 // All of those are children of Item_in_subselect and may use EXISTS
11015 break;
11016 default:
11017 return false;
11018 }
11019
11020 Item_in_subselect * const in_pred=
11021 static_cast<Item_in_subselect *>(unit->item);
11022
11023 Item_exists_subselect::enum_exec_method chosen_method= in_pred->exec_method;
11024 // Materialization does not allow UNION so this can't happen:
11025 assert(chosen_method != Item_exists_subselect::EXEC_MATERIALIZATION);
11026
11027 if ((chosen_method == Item_exists_subselect::EXEC_EXISTS_OR_MAT) &&
11028 compare_costs_of_subquery_strategies(&chosen_method))
11029 return true;
11030
11031 switch (chosen_method)
11032 {
11033 case Item_exists_subselect::EXEC_EXISTS:
11034 return in_pred->finalize_exists_transform(select_lex);
11035 case Item_exists_subselect::EXEC_MATERIALIZATION:
11036 return in_pred->finalize_materialization_transform(this);
11037 default:
11038 assert(false);
11039 return true;
11040 }
11041 }
11042
11043
11044 /**
11045 Tells what is the cheapest between IN->EXISTS and subquery materialization,
11046 in terms of cost, for the subquery's JOIN.
11047 Input:
11048 - join->{best_positions,best_read,best_rowcount} must contain the
11049 execution plan of EXISTS (where 'join' is the subquery's JOIN)
11050 - join2->{best_positions,best_read,best_rowcount} must be correctly set
11051 (where 'join2' is the parent join, the grandparent join, etc).
11052 Output:
11053 join->{best_positions,best_read,best_rowcount} contain the cheapest
11054 execution plan (where 'join' is the subquery's JOIN).
11055
11056 This plan choice has to happen before calling functions which set up
11057 execution structures, like JOIN::get_best_combination().
11058
11059 @param[out] method chosen method (EXISTS or materialization) will be put
11060 here.
11061 @returns false if success
11062 */
compare_costs_of_subquery_strategies(Item_exists_subselect::enum_exec_method * method)11063 bool JOIN::compare_costs_of_subquery_strategies(
11064 Item_exists_subselect::enum_exec_method *method)
11065 {
11066 *method= Item_exists_subselect::EXEC_EXISTS;
11067
11068 Item_exists_subselect::enum_exec_method allowed_strategies=
11069 select_lex->subquery_strategy(thd);
11070
11071 if (allowed_strategies == Item_exists_subselect::EXEC_EXISTS)
11072 return false;
11073
11074 assert(allowed_strategies == Item_exists_subselect::EXEC_EXISTS_OR_MAT ||
11075 allowed_strategies == Item_exists_subselect::EXEC_MATERIALIZATION);
11076
11077 const JOIN *parent_join= unit->outer_select()->join;
11078 if (!parent_join || !parent_join->child_subquery_can_materialize)
11079 return false;
11080
11081 Item_in_subselect * const in_pred=
11082 static_cast<Item_in_subselect *>(unit->item);
11083
11084 /*
11085 Testing subquery_allows_etc() at each optimization is necessary as each
11086 execution of a prepared statement may use a different type of parameter.
11087 */
11088 if (!subquery_allows_materialization(in_pred, thd, select_lex,
11089 select_lex->outer_select()))
11090 return false;
11091
11092 Opt_trace_context * const trace= &thd->opt_trace;
11093 Opt_trace_object trace_wrapper(trace);
11094 Opt_trace_object
11095 trace_subqmat(trace, "execution_plan_for_potential_materialization");
11096 const double saved_best_read= best_read;
11097 const ha_rows saved_best_rowcount= best_rowcount;
11098 POSITION * const saved_best_pos= best_positions;
11099
11100 if (in_pred->in2exists_added_to_where())
11101 {
11102 Opt_trace_array trace_subqmat_steps(trace, "steps");
11103
11104 // Up to one extra slot per semi-join nest is needed (if materialized)
11105 const uint sj_nests= select_lex->sj_nests.elements;
11106
11107 if (!(best_positions= new (thd->mem_root) POSITION[tables + sj_nests]))
11108 return true;
11109
11110 // Compute plans which do not use outer references
11111
11112 assert(allow_outer_refs);
11113 allow_outer_refs= false;
11114
11115 if (optimize_semijoin_nests_for_materialization(this))
11116 return true;
11117
11118 if (Optimize_table_order(thd, this, NULL).choose_table_order())
11119 return true;
11120 }
11121 else
11122 {
11123 /*
11124 If IN->EXISTS didn't add any condition to WHERE (only to HAVING, which
11125 can happen if subquery has aggregates) then the plan for materialization
11126 will be the same as for EXISTS - don't compute it again.
11127 */
11128 trace_subqmat.add("surely_same_plan_as_EXISTS", true).
11129 add_alnum("cause", "EXISTS_did_not_change_WHERE");
11130 }
11131
11132 Semijoin_mat_optimize sjm;
11133 calculate_materialization_costs(this, NULL, primary_tables, &sjm);
11134
11135 /*
11136 The number of evaluations of the subquery influences costs, we need to
11137 compute it.
11138 */
11139 Opt_trace_object trace_subq_mat_decision(trace, "subq_mat_decision");
11140 Opt_trace_array trace_parents(trace, "parent_fanouts");
11141 const Item_subselect *subs= in_pred;
11142 double subq_executions= 1.0;
11143 for(;;)
11144 {
11145 Opt_trace_object trace_parent(trace);
11146 trace_parent.add_select_number(parent_join->select_lex->select_number);
11147 double parent_fanout;
11148 if (// safety, not sure needed
11149 parent_join->plan_is_const() ||
11150 // if subq is in condition on constant table:
11151 !parent_join->child_subquery_can_materialize)
11152 {
11153 parent_fanout= 1.0;
11154 trace_parent.add("subq_attached_to_const_table", true);
11155 }
11156 else
11157 {
11158 if (subs->in_cond_of_tab != NO_PLAN_IDX)
11159 {
11160 /*
11161 Subquery is attached to a certain 'pos', pos[-1].prefix_rowcount
11162 is the number of times we'll start a loop accessing 'pos'; each such
11163 loop will read pos->rows_fetched rows of 'pos', so subquery will
11164 be evaluated pos[-1].prefix_rowcount * pos->rows_fetched times.
11165 Exceptions:
11166 - if 'pos' is first, use 1.0 instead of pos[-1].prefix_rowcount
11167 - if 'pos' is first of a sj-materialization nest, same.
11168
11169 If in a sj-materialization nest, pos->rows_fetched and
11170 pos[-1].prefix_rowcount are of the "nest materialization" plan
11171 (copied back in fix_semijoin_strategies()), which is
11172 appropriate as it corresponds to evaluations of our subquery.
11173
11174 pos->prefix_rowcount is not suitable because if we have:
11175 select ... from ot1 where ot1.col in
11176 (select it1.col1 from it1 where it1.col2 not in (subq));
11177 and subq does subq-mat, and plan is ot1 - it1+firstmatch(ot1),
11178 then:
11179 - t1.prefix_rowcount==1 (due to firstmatch)
11180 - subq is attached to it1, and is evaluated for each row read from
11181 t1, potentially way more than 1.
11182 */
11183 const uint idx= subs->in_cond_of_tab;
11184 assert((int)idx >= 0 && idx < parent_join->tables);
11185 trace_parent.add("subq_attached_to_table", true);
11186 QEP_TAB *const parent_tab= &parent_join->qep_tab[idx];
11187 trace_parent.add_utf8_table(parent_tab->table_ref);
11188 parent_fanout= parent_tab->position()->rows_fetched;
11189 if ((idx > parent_join->const_tables) &&
11190 !sj_is_materialize_strategy(parent_tab->position()->sj_strategy))
11191 parent_fanout*=
11192 parent_tab[-1].position()->prefix_rowcount;
11193 }
11194 else
11195 {
11196 /*
11197 Subquery is SELECT list, GROUP BY, ORDER BY, HAVING: it is evaluated
11198 at the end of the parent join's execution.
11199 It can be evaluated once per row-before-grouping:
11200 SELECT SUM(t1.col IN (subq)) FROM t1 GROUP BY expr;
11201 or once per row-after-grouping:
11202 SELECT SUM(t1.col) AS s FROM t1 GROUP BY expr HAVING s IN (subq),
11203 SELECT SUM(t1.col) IN (subq) FROM t1 GROUP BY expr
11204 It's hard to tell. We simply assume 'once per
11205 row-before-grouping'.
11206
11207 Another approximation:
11208 SELECT ... HAVING x IN (subq) LIMIT 1
11209 best_rowcount=1 due to LIMIT, though HAVING (and thus the subquery)
11210 may be evaluated many times before HAVING becomes true and the limit
11211 is reached.
11212 */
11213 trace_parent.add("subq_attached_to_join_result", true);
11214 parent_fanout= static_cast<double>(parent_join->best_rowcount);
11215 }
11216 }
11217 subq_executions*= parent_fanout;
11218 trace_parent.add("fanout", parent_fanout);
11219 const bool cacheable= parent_join->select_lex->is_cacheable();
11220 trace_parent.add("cacheable", cacheable);
11221 if (cacheable)
11222 {
11223 // Parent executed only once
11224 break;
11225 }
11226 /*
11227 Parent query is executed once per outer row => go up to find number of
11228 outer rows. Example:
11229 SELECT ... IN(subq-with-in2exists WHERE ... IN (subq-with-mat))
11230 */
11231 if (!(subs= parent_join->unit->item))
11232 {
11233 // derived table, materialized only once
11234 break;
11235 }
11236 parent_join= parent_join->unit->outer_select()->join;
11237 if (!parent_join)
11238 {
11239 /*
11240 May be single-table UPDATE/DELETE, has no join.
11241 @todo we should find how many rows it plans to UPDATE/DELETE, taking
11242 inspiration in Explain_table::explain_rows_and_filtered().
11243 This is not a priority as it applies only to
11244 UPDATE - child(non-mat-subq) - grandchild(may-be-mat-subq).
11245 And it will autosolve the day UPDATE gets a JOIN.
11246 */
11247 break;
11248 }
11249 } // for(;;)
11250 trace_parents.end();
11251
11252 const double cost_exists= subq_executions * saved_best_read;
11253 const double cost_mat_table= sjm.materialization_cost.total_cost();
11254 const double cost_mat= cost_mat_table + subq_executions *
11255 sjm.lookup_cost.total_cost();
11256 const bool mat_chosen=
11257 (allowed_strategies == Item_exists_subselect::EXEC_EXISTS_OR_MAT) ?
11258 (cost_mat < cost_exists) : true;
11259 trace_subq_mat_decision
11260 .add("cost_to_create_and_fill_materialized_table",
11261 cost_mat_table)
11262 .add("cost_of_one_EXISTS", saved_best_read)
11263 .add("number_of_subquery_evaluations", subq_executions)
11264 .add("cost_of_materialization", cost_mat)
11265 .add("cost_of_EXISTS", cost_exists)
11266 .add("chosen", mat_chosen);
11267 if (mat_chosen)
11268 *method= Item_exists_subselect::EXEC_MATERIALIZATION;
11269 else
11270 {
11271 best_read= saved_best_read;
11272 best_rowcount= saved_best_rowcount;
11273 best_positions= saved_best_pos;
11274 /*
11275 Don't restore JOIN::positions or best_ref, they're not used
11276 afterwards. best_positions is (like: by get_sj_strategy()).
11277 */
11278 }
11279 return false;
11280 }
11281
11282
11283 /**
11284 Optimize rollup specification.
11285
11286 Allocate objects needed for rollup processing.
11287
11288 @returns false if success, true if error.
11289 */
11290
optimize_rollup()11291 bool JOIN::optimize_rollup()
11292 {
11293 tmp_table_param.quick_group= 0; // Can't create groups in tmp table
11294 rollup.state= ROLLUP::STATE_INITED;
11295
11296 /*
11297 Create pointers to the different sum function groups
11298 These are updated by rollup_make_fields()
11299 */
11300 tmp_table_param.group_parts= send_group_parts;
11301 /*
11302 substitute_gc() might substitute an expression in the GROUP BY list with
11303 a generated column. In such case the GC is added to the all_fields as a
11304 hidden field. In total, all_fields list could be grown by up to
11305 send_group_parts columns. Reserve space for them here.
11306 */
11307 const uint ref_array_size= all_fields.elements + send_group_parts;
11308
11309 Item_null_result **null_items=
11310 static_cast<Item_null_result**>(thd->alloc(sizeof(Item*)*send_group_parts));
11311
11312 rollup.null_items= Item_null_array(null_items, send_group_parts);
11313 rollup.ref_pointer_arrays=
11314 static_cast<Ref_ptr_array*>
11315 (thd->alloc((sizeof(Ref_ptr_array) +
11316 ref_array_size * sizeof(Item*)) * send_group_parts));
11317 rollup.fields=
11318 static_cast<List<Item>*>(thd->alloc(sizeof(List<Item>) * send_group_parts));
11319
11320 if (!null_items || !rollup.ref_pointer_arrays || !rollup.fields)
11321 return true;
11322
11323 Item **ref_array= (Item**) (rollup.ref_pointer_arrays+send_group_parts);
11324
11325 /*
11326 Prepare space for field list for the different levels
11327 These will be filled up in rollup_make_fields()
11328 */
11329 ORDER *group= group_list;
11330 for (uint i= 0; i < send_group_parts; i++, group= group->next)
11331 {
11332 rollup.null_items[i]=
11333 new (thd->mem_root) Item_null_result((*group->item)->field_type(),
11334 (*group->item)->result_type());
11335 if (rollup.null_items[i] == NULL)
11336 return true; /* purecov: inspected */
11337 List<Item> *rollup_fields= &rollup.fields[i];
11338 rollup_fields->empty();
11339 rollup.ref_pointer_arrays[i]= Ref_ptr_array(ref_array, ref_array_size);
11340 ref_array+= ref_array_size;
11341 }
11342 for (uint i= 0; i < send_group_parts; i++)
11343 {
11344 for (uint j= 0; j < fields_list.elements; j++)
11345 rollup.fields[i].push_back(rollup.null_items[i]);
11346 }
11347 return false;
11348 }
11349
11350
11351 /**
11352 Refine the best_rowcount estimation based on what happens after tables
11353 have been joined: LIMIT and type of result sink.
11354 */
refine_best_rowcount()11355 void JOIN::refine_best_rowcount()
11356 {
11357 // If plan is const, 0 or 1 rows should be returned
11358 assert(!plan_is_const() || best_rowcount <= 1);
11359
11360 if (plan_is_const())
11361 return;
11362
11363 /*
11364 If a derived table, or a member of a UNION which itself forms a derived
11365 table:
11366 setting estimate to 0 or 1 row would mark the derived table as const.
11367 The row count is bumped to the nearest higher value, so that the
11368 query block will not be evaluated during optimization.
11369 */
11370 if (best_rowcount <= 1 &&
11371 select_lex->master_unit()->first_select()->linkage ==
11372 DERIVED_TABLE_TYPE)
11373 best_rowcount= 2;
11374
11375 /*
11376 There will be no more rows than defined in the LIMIT clause. Use it
11377 as an estimate. If LIMIT 1 is specified, the query block will be
11378 considered "const", with actual row count 0 or 1.
11379 */
11380 set_if_smaller(best_rowcount, unit->select_limit_cnt);
11381 }
11382
11383 /**
11384 @} (end of group Query_Optimizer)
11385 */
11386
11387 /**
11388 This function is used to get the key length of Item object on
11389 which one tmp field will be created during create_tmp_table.
11390 This function references KEY_PART_INFO::init_from_field().
11391
11392 @param item A inner item of outer join
11393
11394 @return The length of a item to be as a key of a temp table
11395 */
11396
get_key_length_tmp_table(Item * item)11397 static uint32 get_key_length_tmp_table(Item *item)
11398 {
11399 uint32 len= 0;
11400
11401 item= item->real_item();
11402 if (item->type() == Item::FIELD_ITEM)
11403 len= ((Item_field *)item)->field->key_length();
11404 else
11405 len= item->max_length;
11406
11407 if (item->maybe_null)
11408 len+= HA_KEY_NULL_LENGTH;
11409
11410 // references KEY_PART_INFO::init_from_field()
11411 enum_field_types type= item->field_type();
11412 if (type == MYSQL_TYPE_BLOB ||
11413 type == MYSQL_TYPE_VARCHAR ||
11414 type == MYSQL_TYPE_GEOMETRY)
11415 len+= HA_KEY_BLOB_LENGTH;
11416
11417 return len;
11418 }
11419
11420