1 /* Copyright (c) 2000, 2021, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23 /**
24 @file
25
26 @brief Optimize query expressions: Make optimal table join order, select
27 optimal access methods per table, apply grouping, sorting and
28 limit processing.
29
30 @defgroup Query_Optimizer Query Optimizer
31 @{
32 */
33
34 #include "sql_optimizer.h"
35
36 #include "my_bit.h" // my_count_bits
37 #include "abstract_query_plan.h" // Join_plan
38 #include "debug_sync.h" // DEBUG_SYNC
39 #include "item_sum.h" // Item_sum
40 #include "lock.h" // mysql_unlock_some_tables
41 #include "opt_explain.h" // join_type_str
42 #include "opt_trace.h" // Opt_trace_object
43 #include "sql_base.h" // init_ftfuncs
44 #include "sql_join_buffer.h" // JOIN_CACHE
45 #include "sql_parse.h" // check_stack_overrun
46 #include "sql_planner.h" // calculate_condition_filter
47 #include "sql_resolver.h" // subquery_allows_materialization
48 #include "sql_test.h" // print_where
49 #include "sql_tmp_table.h" // get_max_key_and_part_length
50 #include "opt_hints.h" // hint_table_state
51
52 #include <algorithm>
53 using std::max;
54 using std::min;
55
56 static bool optimize_semijoin_nests_for_materialization(JOIN *join);
57 static void calculate_materialization_costs(JOIN *join, TABLE_LIST *sj_nest,
58 uint n_tables,
59 Semijoin_mat_optimize *sjm);
60 static bool make_join_select(JOIN *join, Item *item);
61 static bool list_contains_unique_index(JOIN_TAB *tab,
62 bool (*find_func) (Field *, void *), void *data);
63 static bool find_field_in_item_list (Field *field, void *data);
64 static bool find_field_in_order_list (Field *field, void *data);
65 static ORDER *create_distinct_group(THD *thd, Ref_ptr_array ref_pointer_array,
66 ORDER *order, List<Item> &fields,
67 List<Item> &all_fields,
68 bool *all_order_by_fields_used);
69 static TABLE *get_sort_by_table(ORDER *a,ORDER *b,TABLE_LIST *tables);
70 static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab);
71 static Item *remove_additional_cond(Item* conds);
72 static void trace_table_dependencies(Opt_trace_context * trace,
73 JOIN_TAB *join_tabs,
74 uint table_count);
75 static bool
76 update_ref_and_keys(THD *thd, Key_use_array *keyuse,JOIN_TAB *join_tab,
77 uint tables, Item *cond, COND_EQUAL *cond_equal,
78 table_map normal_tables, SELECT_LEX *select_lex,
79 SARGABLE_PARAM **sargables);
80 static bool pull_out_semijoin_tables(JOIN *join);
81 static void add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab);
82 static ha_rows get_quick_record_count(THD *thd, JOIN_TAB *tab, ha_rows limit);
83 static Item *
84 make_cond_for_table_from_pred(Item *root_cond, Item *cond,
85 table_map tables, table_map used_table,
86 bool exclude_expensive_cond);
87 static bool
88 only_eq_ref_tables(JOIN *join, ORDER *order, table_map tables,
89 table_map *cached_eq_ref_tables, table_map
90 *eq_ref_tables);
91 static bool setup_join_buffering(JOIN_TAB *tab, JOIN *join, uint no_jbuf_after);
92
93 static bool
94 test_if_skip_sort_order(JOIN_TAB *tab, ORDER *order, ha_rows select_limit,
95 const bool no_changes, const key_map *map,
96 const char *clause_type);
97
98 static Item_func_match *test_if_ft_index_order(ORDER *order);
99
100
101 static uint32 get_key_length_tmp_table(Item *item);
102
103 /**
104 Optimizes one query block into a query execution plan (QEP.)
105
106 This is the entry point to the query optimization phase. This phase
107 applies both logical (equivalent) query rewrites, cost-based join
108 optimization, and rule-based access path selection. Once an optimal
109 plan is found, the member function creates/initializes all
110 structures needed for query execution. The main optimization phases
111 are outlined below:
112
113 -# Logical transformations:
114 - Outer to inner joins transformation.
115 - Equality/constant propagation.
116 - Partition pruning.
117 - COUNT(*), MIN(), MAX() constant substitution in case of
118 implicit grouping.
119 - ORDER BY optimization.
120 -# Perform cost-based optimization of table order and access path
121 selection. See JOIN::make_join_plan()
122 -# Post-join order optimization:
123 - Create optimal table conditions from the where clause and the
124 join conditions.
125 - Inject outer-join guarding conditions.
126 - Adjust data access methods after determining table condition
127 (several times.)
128 - Optimize ORDER BY/DISTINCT.
129 -# Code generation
130 - Set data access functions.
131 - Try to optimize away sorting/distinct.
132 - Setup temporary table usage for grouping and/or sorting.
133
134 @retval 0 Success.
135 @retval 1 Error, error code saved in member JOIN::error.
136 */
137 int
optimize()138 JOIN::optimize()
139 {
140 uint no_jbuf_after= UINT_MAX;
141
142 DBUG_ENTER("JOIN::optimize");
143 assert(select_lex->leaf_table_count == 0 ||
144 thd->lex->is_query_tables_locked() ||
145 select_lex == unit->fake_select_lex);
146 assert(tables == 0 &&
147 primary_tables == 0 &&
148 tables_list == (TABLE_LIST*)1);
149
150 // to prevent double initialization on EXPLAIN
151 if (optimized)
152 DBUG_RETURN(0);
153
154 Prepare_error_tracker tracker(thd);
155
156 DEBUG_SYNC(thd, "before_join_optimize");
157
158 THD_STAGE_INFO(thd, stage_optimizing);
159
160 if (select_lex->first_execution)
161 {
162 /**
163 @todo
164 This query block didn't transform itself in SELECT_LEX::prepare(), so
165 belongs to a parent query block. That parent, or its parents, had to
166 transform us - it has not; maybe it is itself in prepare() and
167 evaluating the present query block as an Item_subselect. Such evaluation
168 in prepare() is expected to be a rare case to be eliminated in the
169 future ("SET x=(subq)" is one such case; because it locks tables before
170 prepare()).
171 */
172 if (select_lex->apply_local_transforms(thd, false))
173 DBUG_RETURN(error= 1);
174 }
175
176 Opt_trace_context * const trace= &thd->opt_trace;
177 Opt_trace_object trace_wrapper(trace);
178 Opt_trace_object trace_optimize(trace, "join_optimization");
179 trace_optimize.add_select_number(select_lex->select_number);
180 Opt_trace_array trace_steps(trace, "steps");
181
182 count_field_types(select_lex, &tmp_table_param, all_fields, false, false);
183
184 assert(tmp_table_param.sum_func_count == 0 ||
185 group_list || implicit_grouping);
186
187 if (select_lex->olap == ROLLUP_TYPE && optimize_rollup())
188 DBUG_RETURN(true); /* purecov: inspected */
189
190 if (alloc_func_list())
191 DBUG_RETURN(1); /* purecov: inspected */
192
193 if (select_lex->get_optimizable_conditions(thd, &where_cond, &having_cond))
194 DBUG_RETURN(1);
195
196 set_optimized();
197
198 tables_list= select_lex->get_table_list();
199
200 /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
201 /*
202 Run optimize phase for all derived tables/views used in this SELECT,
203 including those in semi-joins.
204 */
205 if (select_lex->materialized_derived_table_count)
206 {
207 for (TABLE_LIST *tl= select_lex->leaf_tables; tl; tl= tl->next_leaf)
208 {
209 if (tl->is_view_or_derived() && tl->optimize_derived(thd))
210 DBUG_RETURN(1);
211 }
212 }
213
214 /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
215
216 row_limit= ((select_distinct || order || group_list) ?
217 HA_POS_ERROR : unit->select_limit_cnt);
218 // m_select_limit is used to decide if we are likely to scan the whole table.
219 m_select_limit= unit->select_limit_cnt;
220
221 if (unit->first_select()->active_options() & OPTION_FOUND_ROWS)
222 {
223 /*
224 Calculate found rows if
225 - LIMIT is set, and
226 - Query block is not equipped with "braces". In this case, each
227 query block must be calculated fully and the limit is applied on
228 the final UNION evaluation.
229 */
230 calc_found_rows= m_select_limit != HA_POS_ERROR && !select_lex->braces;
231 }
232 if (having_cond || calc_found_rows)
233 m_select_limit= HA_POS_ERROR;
234
235 if (unit->select_limit_cnt == 0 && !calc_found_rows)
236 {
237 zero_result_cause= "Zero limit";
238 best_rowcount= 0;
239 goto setup_subq_exit;
240 }
241
242 if (where_cond || select_lex->outer_join)
243 {
244 if (optimize_cond(thd, &where_cond, &cond_equal,
245 &select_lex->top_join_list, &select_lex->cond_value))
246 {
247 error= 1;
248 DBUG_PRINT("error",("Error from optimize_cond"));
249 DBUG_RETURN(1);
250 }
251 if (select_lex->cond_value == Item::COND_FALSE)
252 {
253 zero_result_cause= "Impossible WHERE";
254 best_rowcount= 0;
255 goto setup_subq_exit;
256 }
257 }
258 if (having_cond)
259 {
260 if (optimize_cond(thd, &having_cond, &cond_equal, NULL,
261 &select_lex->having_value))
262 {
263 error= 1;
264 DBUG_PRINT("error",("Error from optimize_cond"));
265 DBUG_RETURN(1);
266 }
267 if (select_lex->having_value == Item::COND_FALSE)
268 {
269 zero_result_cause= "Impossible HAVING";
270 best_rowcount= 0;
271 goto setup_subq_exit;
272 }
273 }
274
275 if (thd->lex->sql_command == SQLCOM_INSERT_SELECT ||
276 thd->lex->sql_command == SQLCOM_REPLACE_SELECT)
277 {
278 /*
279 Statement-based replication of INSERT ... SELECT ... LIMIT and
280 REPLACE ... SELECT is safe as order of row is defined with either
281 ORDER BY or other condition. However it is too late for it have
282 an impact to our decision to switch to row- based. We can only
283 suppress warning here.
284 */
285 if (select_lex->select_limit &&
286 select_lex->select_limit->fixed &&
287 select_lex->select_limit->val_int() &&
288 !is_order_deterministic(&select_lex->top_join_list, where_cond, order))
289 {
290 thd->order_deterministic= false;
291 }
292 }
293
294 if (select_lex->partitioned_table_count && prune_table_partitions())
295 {
296 error= 1;
297 DBUG_PRINT("error", ("Error from prune_partitions"));
298 DBUG_RETURN(1);
299 }
300
301 /*
302 Try to optimize count(*), min() and max() to const fields if
303 there is implicit grouping (aggregate functions but no
304 group_list). In this case, the result set shall only contain one
305 row.
306 */
307 if (tables_list && implicit_grouping)
308 {
309 int res;
310 /*
311 opt_sum_query() returns HA_ERR_KEY_NOT_FOUND if no rows match
312 the WHERE condition,
313 or 1 if all items were resolved (optimized away),
314 or 0, or an error number HA_ERR_...
315
316 If all items were resolved by opt_sum_query, there is no need to
317 open any tables.
318 */
319 if ((res= opt_sum_query(thd, select_lex->leaf_tables, all_fields,
320 where_cond)))
321 {
322 best_rowcount= 0;
323 if (res == HA_ERR_KEY_NOT_FOUND)
324 {
325 DBUG_PRINT("info",("No matching min/max row"));
326 zero_result_cause= "No matching min/max row";
327 goto setup_subq_exit;
328 }
329 if (res > 1)
330 {
331 error= res;
332 DBUG_PRINT("error",("Error from opt_sum_query"));
333 DBUG_RETURN(1);
334 }
335 if (res < 0)
336 {
337 DBUG_PRINT("info",("No matching min/max row"));
338 zero_result_cause= "No matching min/max row";
339 goto setup_subq_exit;
340 }
341 DBUG_PRINT("info",("Select tables optimized away"));
342 zero_result_cause= "Select tables optimized away";
343 tables_list= 0; // All tables resolved
344 best_rowcount= 1;
345 const_tables= tables= primary_tables= select_lex->leaf_table_count;
346 /*
347 Extract all table-independent conditions and replace the WHERE
348 clause with them. All other conditions were computed by opt_sum_query
349 and the MIN/MAX/COUNT function(s) have been replaced by constants,
350 so there is no need to compute the whole WHERE clause again.
351 Notice that make_cond_for_table() will always succeed to remove all
352 computed conditions, because opt_sum_query() is applicable only to
353 conjunctions.
354 Preserve conditions for EXPLAIN.
355 */
356 if (where_cond && !thd->lex->describe)
357 {
358 Item *table_independent_conds=
359 make_cond_for_table(where_cond, PSEUDO_TABLE_BITS, 0, 0);
360 DBUG_EXECUTE("where",
361 print_where(table_independent_conds,
362 "where after opt_sum_query()",
363 QT_ORDINARY););
364 where_cond= table_independent_conds;
365 }
366 goto setup_subq_exit;
367 }
368 }
369 if (!tables_list)
370 {
371 DBUG_PRINT("info",("No tables"));
372 best_rowcount= 1;
373 error= 0;
374 if (make_tmp_tables_info())
375 DBUG_RETURN(1);
376 count_field_types(select_lex, &tmp_table_param, all_fields, false, false);
377 // Make plan visible for EXPLAIN
378 set_plan_state(NO_TABLES);
379 DBUG_RETURN(0);
380 }
381 error= -1; // Error is sent to client
382 sort_by_table= get_sort_by_table(order, group_list, select_lex->leaf_tables);
383
384 if ((where_cond || group_list || order) &&
385 substitute_gc(thd, select_lex, where_cond, group_list, order))
386 {
387 // We added hidden fields to the all_fields list, count them.
388 count_field_types(select_lex, &tmp_table_param, select_lex->all_fields,
389 false, false);
390 }
391
392 // Set up join order and initial access paths
393 THD_STAGE_INFO(thd, stage_statistics);
394 if (make_join_plan())
395 {
396 if (thd->killed)
397 thd->send_kill_message();
398 DBUG_PRINT("error",("Error: JOIN::make_join_plan() failed"));
399 DBUG_RETURN(1);
400 }
401
402 // At this stage, join_tab==NULL, JOIN_TABs are listed in order by best_ref.
403 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
404
405 if (zero_result_cause)
406 goto setup_subq_exit;
407
408 if (rollup.state != ROLLUP::STATE_NONE)
409 {
410 if (rollup_process_const_fields())
411 {
412 DBUG_PRINT("error", ("Error: rollup_process_fields() failed"));
413 DBUG_RETURN(1);
414 }
415 /*
416 Fields may have been replaced by Item_func_rollup_const, so
417 recalculate the number of fields and functions for this query block.
418 */
419
420 // JOIN::optimize_rollup() may set quick_group=0, and we must not undo that.
421 const uint save_quick_group= tmp_table_param.quick_group;
422
423 count_field_types(select_lex, &tmp_table_param, all_fields, false, false);
424 tmp_table_param.quick_group= save_quick_group;
425 }
426 else
427 {
428 /* Remove distinct if only const tables */
429 select_distinct&= !plan_is_const();
430 }
431
432 if (const_tables && !thd->locked_tables_mode &&
433 !(select_lex->active_options() & SELECT_NO_UNLOCK))
434 {
435 TABLE *ct[MAX_TABLES];
436 for (uint i= 0; i < const_tables; i++)
437 ct[i]= best_ref[i]->table();
438 mysql_unlock_some_tables(thd, ct, const_tables);
439 }
440 if (!where_cond && select_lex->outer_join)
441 {
442 /* Handle the case where we have an OUTER JOIN without a WHERE */
443 where_cond=new Item_int((longlong) 1,1); // Always true
444 }
445
446 error= 0;
447 /*
448 Among the equal fields belonging to the same multiple equality
449 choose the one that is to be retrieved first and substitute
450 all references to these in where condition for a reference for
451 the selected field.
452 */
453 if (where_cond)
454 {
455 where_cond= substitute_for_best_equal_field(where_cond, cond_equal,
456 map2table);
457 if (thd->is_error())
458 {
459 error= 1;
460 DBUG_PRINT("error",("Error from substitute_for_best_equal"));
461 DBUG_RETURN(1);
462 }
463 where_cond->update_used_tables();
464 DBUG_EXECUTE("where",
465 print_where(where_cond,
466 "after substitute_best_equal",
467 QT_ORDINARY););
468 }
469
470 /*
471 Perform the same optimization on field evaluation for all join conditions.
472 */
473 for (uint i= const_tables; i < tables ; ++i)
474 {
475 JOIN_TAB *const tab= best_ref[i];
476 if (tab->position() && tab->join_cond())
477 {
478 tab->set_join_cond(substitute_for_best_equal_field(tab->join_cond(),
479 tab->cond_equal,
480 map2table));
481 if (thd->is_error())
482 {
483 error= 1;
484 DBUG_PRINT("error",("Error from substitute_for_best_equal"));
485 DBUG_RETURN(1);
486 }
487 tab->join_cond()->update_used_tables();
488 }
489 }
490
491 if (init_ref_access())
492 {
493 error= 1;
494 DBUG_PRINT("error",("Error from init_ref_access"));
495 DBUG_RETURN(1);
496 }
497
498 // Update table dependencies after assigning ref access fields
499 update_depend_map();
500
501 THD_STAGE_INFO(thd, stage_preparing);
502
503 if (make_join_select(this, where_cond))
504 {
505 if (thd->is_error())
506 DBUG_RETURN(1);
507
508 zero_result_cause=
509 "Impossible WHERE noticed after reading const tables";
510 goto setup_subq_exit;
511 }
512
513 if (select_lex->query_result()->initialize_tables(this))
514 {
515 DBUG_PRINT("error",("Error: initialize_tables() failed"));
516 DBUG_RETURN(1); // error == -1
517 }
518
519 error= -1; /* if goto err */
520
521 if (optimize_distinct_group_order())
522 DBUG_RETURN(true);
523
524 if ((select_lex->active_options() & SELECT_NO_JOIN_CACHE) ||
525 select_lex->ftfunc_list->elements)
526 no_jbuf_after= 0;
527
528 /* Perform FULLTEXT search before all regular searches */
529 if (select_lex->has_ft_funcs() && optimize_fts_query())
530 DBUG_RETURN(1);
531
532 /*
533 By setting child_subquery_can_materialize so late we gain the following:
534 JOIN::compare_costs_of_subquery_strategies() can test this variable to
535 know if we are have finished evaluating constant conditions, which itself
536 helps determining fanouts.
537 */
538 child_subquery_can_materialize= true;
539
540 /*
541 It's necessary to check const part of HAVING cond as
542 there is a chance that some cond parts may become
543 const items after make_join_statisctics(for example
544 when Item is a reference to const table field from
545 outer join).
546 This check is performed only for those conditions
547 which do not use aggregate functions. In such case
548 temporary table may not be used and const condition
549 elements may be lost during further having
550 condition transformation in JOIN::exec.
551 */
552 if (having_cond && const_table_map && !having_cond->with_sum_func)
553 {
554 having_cond->update_used_tables();
555 if (remove_eq_conds(thd, having_cond, &having_cond,
556 &select_lex->having_value))
557 {
558 error= 1;
559 DBUG_PRINT("error",("Error from remove_eq_conds"));
560 DBUG_RETURN(1);
561 }
562 if (select_lex->having_value == Item::COND_FALSE)
563 {
564 having_cond= new Item_int((longlong) 0,1);
565 zero_result_cause= "Impossible HAVING noticed after reading const tables";
566 goto setup_subq_exit;
567 }
568 }
569
570 /* Cache constant expressions in WHERE, HAVING, ON clauses. */
571 if (!plan_is_const() && cache_const_exprs())
572 DBUG_RETURN(1);
573
574 // See if this subquery can be evaluated with subselect_indexsubquery_engine
575 if (const int ret= replace_index_subquery())
576 {
577 set_plan_state(PLAN_READY);
578 /*
579 We leave optimize() because the rest of it is only about order/group
580 which those subqueries don't have and about setting up plan which
581 we're not going to use due to different execution method.
582 */
583 DBUG_RETURN(ret < 0);
584 }
585
586 {
587 /*
588 If the hint FORCE INDEX FOR ORDER BY/GROUP BY is used for the first
589 table (it does not make sense for other tables) then we cannot do join
590 buffering.
591 */
592 if (!plan_is_const())
593 {
594 const TABLE * const first= best_ref[const_tables]->table();
595 if ((first->force_index_order && order) ||
596 (first->force_index_group && group_list))
597 no_jbuf_after= 0;
598 }
599
600 bool simple_sort= true;
601 // Check whether join cache could be used
602 for (uint i= const_tables; i < tables; i++)
603 {
604 JOIN_TAB *const tab= best_ref[i];
605 if (!tab->position())
606 continue;
607 if (setup_join_buffering(tab, this, no_jbuf_after))
608 DBUG_RETURN(true);
609 if (tab->use_join_cache() != JOIN_CACHE::ALG_NONE)
610 simple_sort= false;
611 assert(tab->type() != JT_FT ||
612 tab->use_join_cache() == JOIN_CACHE::ALG_NONE);
613 }
614 if (!simple_sort)
615 {
616 /*
617 A join buffer is used for this table. We here inform the optimizer
618 that it should not rely on rows of the first non-const table being in
619 order thanks to an index scan; indeed join buffering of the present
620 table subsequently changes the order of rows.
621 */
622 simple_order= simple_group= false;
623 }
624 }
625
626 if (!plan_is_const() && order)
627 {
628 /*
629 Force using of tmp table if sorting by a SP or UDF function due to
630 their expensive and probably non-deterministic nature.
631 */
632 for (ORDER *tmp_order= order; tmp_order ; tmp_order=tmp_order->next)
633 {
634 Item *item= *tmp_order->item;
635 if (item->is_expensive())
636 {
637 /* Force tmp table without sort */
638 simple_order= simple_group= false;
639 break;
640 }
641 }
642 }
643
644 /*
645 Check if we need to create a temporary table.
646 This has to be done if all tables are not already read (const tables)
647 and one of the following conditions holds:
648 - We are using DISTINCT (simple distinct's have already been optimized away)
649 - We are using an ORDER BY or GROUP BY on fields not in the first table
650 - We are using different ORDER BY and GROUP BY orders
651 - The user wants us to buffer the result.
652 When the WITH ROLLUP modifier is present, we cannot skip temporary table
653 creation for the DISTINCT clause just because there are only const tables.
654 */
655 need_tmp= ((!plan_is_const() &&
656 ((select_distinct || (order && !simple_order) ||
657 (group_list && !simple_group)) ||
658 (group_list && order) ||
659 (select_lex->active_options() & OPTION_BUFFER_RESULT))) ||
660 (rollup.state != ROLLUP::STATE_NONE && select_distinct));
661
662 DBUG_EXECUTE("info", TEST_join(this););
663
664 if (!plan_is_const())
665 {
666 JOIN_TAB *tab= best_ref[const_tables];
667 /*
668 Because filesort always does a full table scan or a quick range scan
669 we must add the removed reference to the select for the table.
670 We only need to do this when we have a simple_order or simple_group
671 as in other cases the join is done before the sort.
672 */
673 if ((order || group_list) &&
674 tab->type() != JT_ALL &&
675 tab->type() != JT_FT &&
676 tab->type() != JT_REF_OR_NULL &&
677 ((order && simple_order) || (group_list && simple_group)))
678 {
679 if (add_ref_to_table_cond(thd,tab)) {
680 DBUG_RETURN(1);
681 }
682 }
683 // Test if we can use an index instead of sorting
684 test_skip_sort();
685 }
686
687 if (alloc_qep(tables))
688 DBUG_RETURN(error= 1); /* purecov: inspected */
689
690 if (make_join_readinfo(this, no_jbuf_after))
691 DBUG_RETURN(1); /* purecov: inspected */
692
693 if (make_tmp_tables_info())
694 DBUG_RETURN(1);
695
696 // At this stage, we have fully set QEP_TABs; JOIN_TABs are unaccessible,
697 // pushed joins(see below) are still allowed to change the QEP_TABs
698
699 /*
700 Push joins to handlerton(s)
701
702 The handlerton(s) will inspect the QEP through the
703 AQP (Abstract Query Plan) and extract from it whatever
704 it might implement of pushed execution.
705
706 It is the responsibility of the handler:
707 - to store any information it need for later
708 execution of pushed queries.
709 - to call appropriate AQP functions which modifies the
710 QEP to use the special 'linked' read functions
711 for those parts of the join which have been pushed.
712
713 Currently pushed joins are only implemented by NDB.
714
715 It only make sense to try pushing if > 1 non-const tables.
716 */
717 if (!plan_is_single_table() && !plan_is_const())
718 {
719 const AQP::Join_plan plan(this);
720 if (ha_make_pushed_joins(thd, &plan))
721 DBUG_RETURN(1);
722 }
723
724 // Update m_current_query_cost to reflect actual need of filesort.
725 if (sort_cost > 0.0 && !explain_flags.any(ESP_USING_FILESORT))
726 {
727 best_read-= sort_cost;
728 sort_cost= 0.0;
729 if (thd->lex->is_single_level_stmt())
730 thd->m_current_query_cost= best_read;
731 }
732
733 count_field_types(select_lex, &tmp_table_param, all_fields, false, false);
734 // Make plan visible for EXPLAIN
735 set_plan_state(PLAN_READY);
736
737 DEBUG_SYNC(thd, "after_join_optimize");
738
739 error= 0;
740 DBUG_RETURN(0);
741
742 setup_subq_exit:
743
744 assert(zero_result_cause != NULL);
745 /*
746 Even with zero matching rows, subqueries in the HAVING clause may
747 need to be evaluated if there are aggregate functions in the
748 query. If this JOIN is part of an outer query, subqueries in HAVING may
749 be evaluated several times in total; so subquery materialization makes
750 sense.
751 */
752 child_subquery_can_materialize= true;
753 trace_steps.end(); // because all steps are done
754 Opt_trace_object(trace, "empty_result")
755 .add_alnum("cause", zero_result_cause);
756
757 having_for_explain= having_cond;
758 error= 0;
759
760 if (!qep_tab && best_ref)
761 {
762 /*
763 After creation of JOIN_TABs in make_join_plan(), we have shortcut due to
764 some zero_result_cause. For simplification, if we have JOIN_TABs we
765 want QEP_TABs too.
766 */
767 if (alloc_qep(tables))
768 DBUG_RETURN(1); /* purecov: inspected */
769 unplug_join_tabs();
770 }
771
772 set_plan_state(ZERO_RESULT);
773 DBUG_RETURN(0);
774 }
775
776
777 /**
778 Substitute all expressions in the WHERE condition and ORDER/GROUP lists
779 that match generated columns (GC) expressions with GC fields, if any.
780
781 @details This function does 3 things:
782 1) Creates list of all GC fields that are a part of a key and the GC
783 expression is a function. All query tables are scanned. If there's no
784 such fields, function exits.
785 2) By means of Item::compile() WHERE clause is transformed.
786 @see Item_func::gc_subst_transformer() for details.
787 3) If there's ORDER/GROUP BY clauses, this function tries to substitute
788 expressions in these lists with GC too. It removes from the list of
789 indexed GC all elements which index blocked by hints. This is done to
790 reduce amount of further work. Next it goes through ORDER/GROUP BY list
791 and matches the expression in it against GC expressions in indexed GC
792 list. When a match is found, the expression is replaced with a new
793 Item_field for the matched GC field. Also, this new field is added to
794 the hidden part of all_fields list.
795
796 @param thd thread handle
797 @param select_lex the current select
798 @param where_cond the WHERE condition, possibly NULL
799 @param group_list the GROUP BY clause, possibly NULL
800 @param order the ORDER BY clause, possibly NULL
801
802 @return true if the GROUP BY clause or the ORDER BY clause was
803 changed, false otherwise
804 */
805
substitute_gc(THD * thd,SELECT_LEX * select_lex,Item * where_cond,ORDER * group_list,ORDER * order)806 bool substitute_gc(THD *thd, SELECT_LEX *select_lex, Item *where_cond,
807 ORDER *group_list, ORDER *order)
808 {
809 List<Field> indexed_gc;
810 Opt_trace_context * const trace= &thd->opt_trace;
811 Opt_trace_object trace_wrapper(trace);
812 Opt_trace_object subst_gc(trace, "substitute_generated_columns");
813
814 // Collect all GCs that are a part of a key
815 for (TABLE_LIST *tl= select_lex->leaf_tables;
816 tl;
817 tl= tl->next_leaf)
818 {
819 if (tl->table->s->keys == 0)
820 continue;
821 for (uint i= 0; i < tl->table->s->fields; i++)
822 {
823 Field *fld= tl->table->field[i];
824 if (fld->is_gcol() && !fld->part_of_key.is_clear_all() &&
825 fld->gcol_info->expr_item->can_be_substituted_for_gc())
826 {
827 // Don't check allowed keys here as conditions/group/order use
828 // different keymaps for that.
829 indexed_gc.push_back(fld);
830 }
831 }
832 }
833 // No GC in the tables used in the query
834 if (indexed_gc.elements == 0)
835 return false;
836
837 if (where_cond)
838 {
839 // Item_func::compile will dereference this pointer, provide valid value.
840 uchar i, *dummy= &i;
841 where_cond->compile(&Item::gc_subst_analyzer, &dummy,
842 &Item::gc_subst_transformer, (uchar*) &indexed_gc);
843 subst_gc.add("resulting_condition", where_cond);
844 }
845
846 if (!(group_list || order))
847 return false;
848 // Filter out GCs that do not have index usable for GROUP/ORDER
849 Field *gc;
850 List_iterator<Field> li(indexed_gc);
851
852 while ((gc= li++))
853 {
854 key_map tkm= gc->part_of_key;
855 tkm.intersect(group_list ? gc->table->keys_in_use_for_group_by :
856 gc->table->keys_in_use_for_order_by);
857 if (tkm.is_clear_all())
858 li.remove();
859 }
860 if (!indexed_gc.elements)
861 return false;
862
863 // Index could be used for ORDER only if there is no GROUP
864 ORDER *list= group_list ? group_list : order;
865 bool changed= false;
866 for (ORDER *ord= list; ord; ord= ord->next)
867 {
868 li.rewind();
869 if (!(*ord->item)->can_be_substituted_for_gc())
870 continue;
871 while ((gc= li++))
872 {
873 Item_func *tmp= pointer_cast<Item_func*>(*ord->item);
874 Item_field *field;
875 if ((field= get_gc_for_expr(&tmp, gc, gc->result_type())))
876 {
877
878 changed= true;
879 /* Add new field to field list. */
880 ord->item= select_lex->add_hidden_item(field);
881 break;
882 }
883 }
884 }
885 if (changed && trace->is_started())
886 {
887 String str;
888 st_select_lex::print_order(&str, list,
889 enum_query_type(QT_TO_SYSTEM_CHARSET |
890 QT_SHOW_SELECT_NUMBER |
891 QT_NO_DEFAULT_DB));
892 subst_gc.add_utf8(group_list ? "resulting_GROUP_BY" :
893 "resulting_ORDER_BY",
894 str.ptr(), str.length());
895 }
896 return changed;
897 }
898
899
900 /**
901 Sets the plan's state of the JOIN. This is always the final step of
902 optimization; starting from this call, we expose the plan to other
903 connections (via EXPLAIN CONNECTION) so the plan has to be final.
904 QEP_TAB's quick_optim, condition_optim and keyread_optim are set here.
905 */
set_plan_state(enum_plan_state plan_state_arg)906 void JOIN::set_plan_state(enum_plan_state plan_state_arg)
907 {
908 // A plan should not change to another plan:
909 assert(plan_state_arg == NO_PLAN || plan_state == NO_PLAN);
910 if (plan_state == NO_PLAN && plan_state_arg != NO_PLAN)
911 {
912 if (qep_tab != NULL)
913 {
914 /*
915 We want to cover primary tables, tmp tables (they may have a sort, so
916 their "quick" and "condition" may change when execution runs the
917 sort), and sj-mat inner tables. Note that make_tmp_tables_info() may
918 have added a sort to the first non-const primary table, so it's
919 important to do those assignments after make_tmp_tables_info().
920 */
921 for (uint i= const_tables; i < tables; ++i)
922 {
923 qep_tab[i].set_quick_optim();
924 qep_tab[i].set_condition_optim();
925 qep_tab[i].set_keyread_optim();
926 }
927 }
928 }
929
930 DEBUG_SYNC(thd, "before_set_plan");
931
932 // If SQLCOM_END, no thread is explaining our statement anymore.
933 const bool need_lock= thd->query_plan.get_command() != SQLCOM_END;
934
935 if (need_lock)
936 thd->lock_query_plan();
937 plan_state= plan_state_arg;
938 if (need_lock)
939 thd->unlock_query_plan();
940 }
941
942
alloc_qep(uint n)943 bool JOIN::alloc_qep(uint n)
944 {
945 // Just to be sure that type plan_idx is wide enough:
946 compile_time_assert(MAX_TABLES <= INT_MAX8);
947
948 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
949
950 qep_tab= new(thd->mem_root) QEP_TAB[n];
951 if (!qep_tab)
952 return true; /* purecov: inspected */
953 for (uint i= 0; i < n; ++i)
954 qep_tab[i].init(best_ref[i]);
955 return false;
956 }
957
958
init(JOIN_TAB * jt)959 void QEP_TAB::init(JOIN_TAB *jt)
960 {
961 jt->share_qs(this);
962 set_table(table()); // to update table()->reginfo.qep_tab
963 table_ref= jt->table_ref;
964 }
965
966
967 /// @returns semijoin strategy for this table.
get_sj_strategy() const968 uint QEP_TAB::get_sj_strategy() const
969 {
970 if (first_sj_inner() == NO_PLAN_IDX)
971 return SJ_OPT_NONE;
972 const uint s= join()->qep_tab[first_sj_inner()].position()->sj_strategy;
973 assert(s != SJ_OPT_NONE);
974 return s;
975 }
976
977 /**
978 Return the index used for a table in a QEP
979
980 The various access methods have different places where the index/key
981 number is stored, so this function is needed to return the correct value.
982
983 @returns index number, or MAX_KEY if not applicable.
984
985 JT_SYSTEM and JT_ALL does not use an index, and will always return MAX_KEY.
986
987 JT_INDEX_MERGE supports more than one index. Hence MAX_KEY is returned and
988 a further inspection is needed.
989 */
effective_index() const990 uint QEP_TAB::effective_index() const
991 {
992 switch (type())
993 {
994 case JT_SYSTEM:
995 assert(ref().key == -1);
996 return MAX_KEY;
997
998 case JT_CONST:
999 case JT_EQ_REF:
1000 case JT_REF_OR_NULL:
1001 case JT_REF:
1002 assert(ref().key != -1);
1003 return uint(ref().key);
1004
1005 case JT_INDEX_SCAN:
1006 case JT_FT:
1007 return index();
1008
1009 case JT_INDEX_MERGE:
1010 assert(quick()->index == MAX_KEY);
1011 return MAX_KEY;
1012
1013 case JT_RANGE:
1014 return quick()->index;
1015
1016 case JT_ALL:
1017 default:
1018 // @todo Check why JT_UNKNOWN is a valid value here.
1019 assert(type() == JT_ALL || type() == JT_UNKNOWN);
1020 return MAX_KEY;
1021 }
1022 }
1023
get_sj_strategy() const1024 uint JOIN_TAB::get_sj_strategy() const
1025 {
1026 if (first_sj_inner() == NO_PLAN_IDX)
1027 return SJ_OPT_NONE;
1028 ASSERT_BEST_REF_IN_JOIN_ORDER(join());
1029 JOIN_TAB *tab= join()->best_ref[first_sj_inner()];
1030 uint s= tab->position()->sj_strategy;
1031 assert(s != SJ_OPT_NONE);
1032 return s;
1033 }
1034
1035
replace_index_subquery()1036 int JOIN::replace_index_subquery()
1037 {
1038 DBUG_ENTER("replace_index_subquery");
1039 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
1040
1041 if (group_list ||
1042 !(unit->item && unit->item->substype() == Item_subselect::IN_SUBS) ||
1043 primary_tables != 1 || !where_cond ||
1044 unit->is_union())
1045 DBUG_RETURN(0);
1046
1047 // Guaranteed by remove_redundant_subquery_clauses():
1048 assert(order == NULL && !select_distinct);
1049
1050 subselect_engine *engine= NULL;
1051 Item_in_subselect * const in_subs=
1052 static_cast<Item_in_subselect *>(unit->item);
1053 enum join_type type= JT_UNKNOWN;
1054
1055 JOIN_TAB *const first_join_tab= best_ref[0];
1056
1057 if (in_subs->exec_method == Item_exists_subselect::EXEC_MATERIALIZATION)
1058 {
1059 // We cannot have two engines at the same time
1060 }
1061 else if (having_cond == NULL)
1062 {
1063 if (first_join_tab->type() == JT_EQ_REF &&
1064 first_join_tab->ref().items[0]->item_name.ptr() == in_left_expr_name)
1065 {
1066 type= JT_UNIQUE_SUBQUERY;
1067 /*
1068 This uses test_if_ref(), which needs access to JOIN_TAB::join_cond() so
1069 it must be done before we get rid of JOIN_TAB.
1070 */
1071 remove_subq_pushed_predicates();
1072 }
1073 else if (first_join_tab->type() == JT_REF &&
1074 first_join_tab->ref().items[0]->item_name.ptr() == in_left_expr_name)
1075 {
1076 type= JT_INDEX_SUBQUERY;
1077 remove_subq_pushed_predicates();
1078 }
1079 }
1080 else if (first_join_tab->type() == JT_REF_OR_NULL &&
1081 first_join_tab->ref().items[0]->item_name.ptr() == in_left_expr_name &&
1082 having_cond->item_name.ptr() == in_having_cond)
1083 {
1084 type= JT_INDEX_SUBQUERY;
1085 where_cond= remove_additional_cond(where_cond);
1086 }
1087
1088 if (type == JT_UNKNOWN)
1089 DBUG_RETURN(0);
1090
1091 if (alloc_qep(tables))
1092 DBUG_RETURN(-1); /* purecov: inspected */
1093 unplug_join_tabs();
1094
1095 error= 0;
1096 QEP_TAB *const first_qep_tab= &qep_tab[0];
1097
1098 if (first_qep_tab->table()->covering_keys.is_set(first_qep_tab->ref().key))
1099 {
1100 assert(!first_qep_tab->table()->no_keyread);
1101 first_qep_tab->table()->set_keyread(true);
1102 }
1103 // execution uses where_cond:
1104 first_qep_tab->set_condition(where_cond);
1105
1106 engine=
1107 new subselect_indexsubquery_engine(thd, first_qep_tab, unit->item,
1108 where_cond,
1109 having_cond,
1110 // check_null
1111 first_qep_tab->type() == JT_REF_OR_NULL,
1112 // unique
1113 type == JT_UNIQUE_SUBQUERY);
1114 /**
1115 @todo If having_cond!=NULL we pass unique=false. But for this query:
1116 (oe1, oe2) IN (SELECT primary_key, non_key_maybe_null_field FROM tbl)
1117 we could use "unique=true" for the first index component and let
1118 Item_is_not_null_test(non_key_maybe_null_field) handle the second.
1119 */
1120
1121 first_qep_tab->set_type(type);
1122
1123 if (!unit->item->change_engine(engine))
1124 DBUG_RETURN(1);
1125 else // error:
1126 DBUG_RETURN(-1); /* purecov: inspected */
1127 }
1128
1129
optimize_distinct_group_order()1130 bool JOIN::optimize_distinct_group_order()
1131 {
1132 DBUG_ENTER("optimize_distinct_group_order");
1133 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
1134
1135 /* Optimize distinct away if possible */
1136 {
1137 ORDER *org_order= order;
1138 order= ORDER_with_src(remove_const(order, where_cond, 1, &simple_order,
1139 "ORDER BY"),
1140 order.src);
1141 if (thd->is_error())
1142 {
1143 error= 1;
1144 DBUG_PRINT("error",("Error from remove_const"));
1145 DBUG_RETURN(true);
1146 }
1147
1148 /*
1149 If we are using ORDER BY NULL or ORDER BY const_expression,
1150 return result in any order (even if we are using a GROUP BY)
1151 */
1152 if (!order && org_order)
1153 skip_sort_order= 1;
1154 }
1155 /*
1156 Check if we can optimize away GROUP BY/DISTINCT.
1157 We can do that if there are no aggregate functions, the
1158 fields in DISTINCT clause (if present) and/or columns in GROUP BY
1159 (if present) contain direct references to all key parts of
1160 an unique index (in whatever order) and if the key parts of the
1161 unique index cannot contain NULLs.
1162 Note that the unique keys for DISTINCT and GROUP BY should not
1163 be the same (as long as they are unique).
1164
1165 The FROM clause must contain a single non-constant table.
1166
1167 @todo Apart from the LIS test, every condition depends only on facts
1168 which can be known in SELECT_LEX::prepare(), possibly this block should
1169 move there.
1170 */
1171
1172 JOIN_TAB *const tab= best_ref[const_tables];
1173
1174 if (plan_is_single_table() &&
1175 (group_list || select_distinct) &&
1176 !tmp_table_param.sum_func_count &&
1177 (!tab->quick() ||
1178 tab->quick()->get_type() !=
1179 QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX))
1180 {
1181 if (group_list && rollup.state == ROLLUP::STATE_NONE &&
1182 list_contains_unique_index(tab,
1183 find_field_in_order_list,
1184 (void *) group_list))
1185 {
1186 /*
1187 We have found that grouping can be removed since groups correspond to
1188 only one row anyway, but we still have to guarantee correct result
1189 order. The line below effectively rewrites the query from GROUP BY
1190 <fields> to ORDER BY <fields>. There are three exceptions:
1191 - if skip_sort_order is set (see above), then we can simply skip
1192 GROUP BY;
1193 - if IN(subquery), likewise (see remove_redundant_subquery_clauses())
1194 - we can only rewrite ORDER BY if the ORDER BY fields are 'compatible'
1195 with the GROUP BY ones, i.e. either one is a prefix of another.
1196 We only check if the ORDER BY is a prefix of GROUP BY. In this case
1197 test_if_subpart() copies the ASC/DESC attributes from the original
1198 ORDER BY fields.
1199 If GROUP BY is a prefix of ORDER BY, then it is safe to leave
1200 'order' as is.
1201 */
1202 if (!order || test_if_subpart(group_list, order))
1203 order= (skip_sort_order ||
1204 (unit->item && unit->item->substype() ==
1205 Item_subselect::IN_SUBS)) ? NULL : group_list;
1206
1207 /*
1208 If we have an IGNORE INDEX FOR GROUP BY(fields) clause, this must be
1209 rewritten to IGNORE INDEX FOR ORDER BY(fields).
1210 */
1211 best_ref[0]->table()->keys_in_use_for_order_by=
1212 best_ref[0]->table()->keys_in_use_for_group_by;
1213 group_list= 0;
1214 grouped= false;
1215 }
1216 if (select_distinct &&
1217 list_contains_unique_index(tab,
1218 find_field_in_item_list,
1219 (void *) &fields_list))
1220 {
1221 select_distinct= 0;
1222 }
1223 }
1224 if (!(group_list || tmp_table_param.sum_func_count) &&
1225 select_distinct &&
1226 plan_is_single_table() &&
1227 rollup.state == ROLLUP::STATE_NONE)
1228 {
1229 /*
1230 We are only using one table. In this case we change DISTINCT to a
1231 GROUP BY query if:
1232 - The GROUP BY can be done through indexes (no sort) and the ORDER
1233 BY only uses selected fields.
1234 (In this case we can later optimize away GROUP BY and ORDER BY)
1235 - We are scanning the whole table without LIMIT
1236 This can happen if:
1237 - We are using CALC_FOUND_ROWS
1238 - We are using an ORDER BY that can't be optimized away.
1239
1240 We don't want to use this optimization when we are using LIMIT
1241 because in this case we can just create a temporary table that
1242 holds LIMIT rows and stop when this table is full.
1243 */
1244 if (order)
1245 {
1246 skip_sort_order=
1247 test_if_skip_sort_order(tab, order, m_select_limit,
1248 true, // no_changes
1249 &tab->table()->keys_in_use_for_order_by,
1250 "ORDER BY");
1251 count_field_types(select_lex, &tmp_table_param, all_fields, false, false);
1252 }
1253 ORDER *o;
1254 bool all_order_fields_used;
1255 if ((o= create_distinct_group(thd, ref_ptrs,
1256 order, fields_list, all_fields,
1257 &all_order_fields_used)))
1258 {
1259 group_list= ORDER_with_src(o, ESC_DISTINCT);
1260 const bool skip_group=
1261 skip_sort_order &&
1262 test_if_skip_sort_order(tab, group_list, m_select_limit,
1263 true, // no_changes
1264 &tab->table()->keys_in_use_for_group_by,
1265 "GROUP BY");
1266 count_field_types(select_lex, &tmp_table_param, all_fields, false, false);
1267 if ((skip_group && all_order_fields_used) ||
1268 m_select_limit == HA_POS_ERROR ||
1269 (order && !skip_sort_order))
1270 {
1271 /* Change DISTINCT to GROUP BY */
1272 select_distinct= 0;
1273 no_order= !order;
1274 if (all_order_fields_used)
1275 {
1276 if (order && skip_sort_order)
1277 {
1278 /*
1279 Force MySQL to read the table in sorted order to get result in
1280 ORDER BY order.
1281 */
1282 tmp_table_param.quick_group=0;
1283 }
1284 order=0;
1285 }
1286 grouped= true; // For end_write_group
1287 }
1288 else
1289 group_list= 0;
1290 }
1291 else if (thd->is_fatal_error) // End of memory
1292 DBUG_RETURN(true);
1293 }
1294 simple_group= 0;
1295 {
1296 ORDER *old_group_list= group_list;
1297 group_list= ORDER_with_src(remove_const(group_list, where_cond,
1298 rollup.state == ROLLUP::STATE_NONE,
1299 &simple_group, "GROUP BY"),
1300 group_list.src);
1301
1302 if (thd->is_error())
1303 {
1304 error= 1;
1305 DBUG_PRINT("error",("Error from remove_const"));
1306 DBUG_RETURN(true);
1307 }
1308 if (old_group_list && !group_list)
1309 select_distinct= 0;
1310 }
1311 if (!group_list && grouped)
1312 {
1313 order=0; // The output has only one row
1314 simple_order=1;
1315 select_distinct= 0; // No need in distinct for 1 row
1316 group_optimized_away= 1;
1317 }
1318
1319 calc_group_buffer(this, group_list);
1320 send_group_parts= tmp_table_param.group_parts; /* Save org parts */
1321
1322 if (test_if_subpart(group_list, order) ||
1323 (!group_list && tmp_table_param.sum_func_count))
1324 {
1325 order=0;
1326 if (is_indexed_agg_distinct(this, NULL))
1327 sort_and_group= 0;
1328 }
1329
1330 DBUG_RETURN(false);
1331 }
1332
1333
test_skip_sort()1334 void JOIN::test_skip_sort()
1335 {
1336 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
1337 JOIN_TAB *const tab= best_ref[const_tables];
1338
1339 assert(ordered_index_usage == ordered_index_void);
1340
1341 if (group_list) // GROUP BY honoured first
1342 // (DISTINCT was rewritten to GROUP BY if skippable)
1343 {
1344 /*
1345 When there is SQL_BIG_RESULT do not sort using index for GROUP BY,
1346 and thus force sorting on disk unless a group min-max optimization
1347 is going to be used as it is applied now only for one table queries
1348 with covering indexes.
1349 */
1350 if (!(select_lex->active_options() & SELECT_BIG_RESULT || with_json_agg) ||
1351 (tab->quick() &&
1352 tab->quick()->get_type() ==
1353 QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX))
1354 {
1355 if (simple_group && // GROUP BY is possibly skippable
1356 !select_distinct) // .. if not preceded by a DISTINCT
1357 {
1358 /*
1359 Calculate a possible 'limit' of table rows for 'GROUP BY':
1360 A specified 'LIMIT' is relative to the final resultset.
1361 'need_tmp' implies that there will be more postprocessing
1362 so the specified 'limit' should not be enforced yet.
1363 */
1364 const ha_rows limit = need_tmp ? HA_POS_ERROR : m_select_limit;
1365
1366 if (test_if_skip_sort_order(tab, group_list, limit, false,
1367 &tab->table()->keys_in_use_for_group_by,
1368 "GROUP BY"))
1369 {
1370 ordered_index_usage= ordered_index_group_by;
1371 }
1372 }
1373
1374 /*
1375 If we are going to use semi-join LooseScan, it will depend
1376 on the selected index scan to be used. If index is not used
1377 for the GROUP BY, we risk that sorting is put on the LooseScan
1378 table. In order to avoid this, force use of temporary table.
1379 TODO: Explain the quick_group part of the test below.
1380 */
1381 if ((ordered_index_usage != ordered_index_group_by) &&
1382 (tmp_table_param.quick_group ||
1383 (tab->emb_sj_nest &&
1384 tab->position()->sj_strategy == SJ_OPT_LOOSE_SCAN)))
1385 {
1386 need_tmp= true;
1387 simple_order= simple_group= false; // Force tmp table without sort
1388 }
1389 }
1390 }
1391 else if (order && // ORDER BY wo/ preceding GROUP BY
1392 (simple_order || skip_sort_order)) // which is possibly skippable
1393 {
1394 if (test_if_skip_sort_order(tab, order, m_select_limit, false,
1395 &tab->table()->keys_in_use_for_order_by,
1396 "ORDER BY"))
1397 {
1398 ordered_index_usage= ordered_index_order_by;
1399 }
1400 }
1401 }
1402
1403
1404 /**
1405 Test if ORDER BY is a single MATCH function(ORDER BY MATCH)
1406 and sort order is descending.
1407
1408 @param order pointer to ORDER struct.
1409
1410 @retval
1411 Pointer to MATCH function if order is 'ORDER BY MATCH() DESC'
1412 @retval
1413 NULL otherwise
1414 */
1415
test_if_ft_index_order(ORDER * order)1416 static Item_func_match *test_if_ft_index_order(ORDER *order)
1417 {
1418 if (order && order->next == NULL &&
1419 order->direction == ORDER::ORDER_DESC &&
1420 (*order->item)->type() == Item::FUNC_ITEM &&
1421 ((Item_func*) (*order->item))->functype() == Item_func::FT_FUNC)
1422 return static_cast<Item_func_match*> (*order->item)->get_master();
1423
1424 return NULL;
1425 }
1426
1427 /**
1428 Test if this is a prefix index.
1429
1430 @param table table
1431 @param idx index to check
1432
1433 @return TRUE if this is a prefix index
1434 */
is_prefix_index(TABLE * table,uint idx)1435 bool is_prefix_index(TABLE* table, uint idx)
1436 {
1437 if (!table || !table->key_info)
1438 {
1439 return false;
1440 }
1441 KEY* key_info = table->key_info;
1442 uint key_parts = key_info[idx].user_defined_key_parts;
1443 KEY_PART_INFO* key_part = key_info[idx].key_part;
1444
1445 for (uint i = 0; i < key_parts; i++, key_part++)
1446 {
1447 if (key_part->field &&
1448 (key_part->length !=
1449 table->field[key_part->fieldnr - 1]->key_length() &&
1450 !(key_info->flags & (HA_FULLTEXT | HA_SPATIAL))))
1451 {
1452 return true;
1453 }
1454 }
1455 return false;
1456 }
1457
1458 /**
1459 Test if one can use the key to resolve ordering.
1460
1461 @param order Sort order
1462 @param table Table to sort
1463 @param idx Index to check
1464 @param[out] used_key_parts NULL by default, otherwise return value for
1465 used key parts.
1466
1467 @note
1468 used_key_parts is set to correct key parts used if return value != 0
1469 (On other cases, used_key_part may be changed)
1470 Note that the value may actually be greater than the number of index
1471 key parts. This can happen for storage engines that have the primary
1472 key parts as a suffix for every secondary key.
1473
1474 @retval
1475 1 key is ok.
1476 @retval
1477 0 Key can't be used
1478 @retval
1479 -1 Reverse key can be used
1480 */
1481
test_if_order_by_key(ORDER * order,TABLE * table,uint idx,uint * used_key_parts)1482 int test_if_order_by_key(ORDER *order, TABLE *table, uint idx,
1483 uint *used_key_parts)
1484 {
1485 KEY_PART_INFO *key_part,*key_part_end;
1486 key_part=table->key_info[idx].key_part;
1487 key_part_end=key_part+table->key_info[idx].user_defined_key_parts;
1488 key_part_map const_key_parts=table->const_key_parts[idx];
1489 int reverse=0;
1490 uint key_parts;
1491 my_bool on_pk_suffix= FALSE;
1492 DBUG_ENTER("test_if_order_by_key");
1493
1494 for (; order ; order=order->next, const_key_parts>>=1)
1495 {
1496
1497 /*
1498 Since only fields can be indexed, ORDER BY <something> that is
1499 not a field cannot be resolved by using an index.
1500 */
1501 Item *real_itm= (*order->item)->real_item();
1502 if (real_itm->type() != Item::FIELD_ITEM)
1503 DBUG_RETURN(0);
1504
1505 Field *field= static_cast<Item_field*>(real_itm)->field;
1506 int flag;
1507
1508 /*
1509 Skip key parts that are constants in the WHERE clause.
1510 These are already skipped in the ORDER BY by const_expression_in_where()
1511 */
1512 for (; const_key_parts & 1 && key_part < key_part_end ;
1513 const_key_parts>>= 1)
1514 key_part++;
1515
1516 /* Avoid usage of prefix index for sorting a partition table */
1517 if (table->part_info && key_part != table->key_info[idx].key_part &&
1518 key_part != key_part_end && is_prefix_index(table, idx))
1519 DBUG_RETURN(0);
1520
1521 if (key_part == key_part_end)
1522 {
1523 /*
1524 We are at the end of the key. Check if the engine has the primary
1525 key as a suffix to the secondary keys. If it has continue to check
1526 the primary key as a suffix.
1527 */
1528 if (!on_pk_suffix &&
1529 (table->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX) &&
1530 table->s->primary_key != MAX_KEY &&
1531 table->s->primary_key != idx)
1532 {
1533 on_pk_suffix= TRUE;
1534 key_part= table->key_info[table->s->primary_key].key_part;
1535 key_part_end=key_part +
1536 table->key_info[table->s->primary_key].user_defined_key_parts;
1537 const_key_parts=table->const_key_parts[table->s->primary_key];
1538
1539 for (; const_key_parts & 1 ; const_key_parts>>= 1)
1540 key_part++;
1541 /*
1542 The primary and secondary key parts were all const (i.e. there's
1543 one row). The sorting doesn't matter.
1544 */
1545 if (key_part == key_part_end && reverse == 0)
1546 {
1547 key_parts= 0;
1548 reverse= 1;
1549 goto ok;
1550 }
1551 }
1552 else
1553 DBUG_RETURN(0);
1554 }
1555
1556 if (key_part->field != field || !field->part_of_sortkey.is_set(idx))
1557 DBUG_RETURN(0);
1558
1559 const ORDER::enum_order keypart_order=
1560 (key_part->key_part_flag & HA_REVERSE_SORT) ?
1561 ORDER::ORDER_DESC : ORDER::ORDER_ASC;
1562 /* set flag to 1 if we can use read-next on key, else to -1 */
1563 flag= (order->direction == keypart_order) ? 1 : -1;
1564 if (reverse && flag != reverse)
1565 DBUG_RETURN(0);
1566 reverse=flag; // Remember if reverse
1567 key_part++;
1568 }
1569 if (on_pk_suffix)
1570 {
1571 uint used_key_parts_secondary= table->key_info[idx].user_defined_key_parts;
1572 uint used_key_parts_pk=
1573 (uint) (key_part - table->key_info[table->s->primary_key].key_part);
1574 key_parts= used_key_parts_pk + used_key_parts_secondary;
1575
1576 if (reverse == -1 &&
1577 (!(table->file->index_flags(idx, used_key_parts_secondary - 1, 1) &
1578 HA_READ_PREV) ||
1579 !(table->file->index_flags(table->s->primary_key,
1580 used_key_parts_pk - 1, 1) & HA_READ_PREV)))
1581 reverse= 0; // Index can't be used
1582 }
1583 else
1584 {
1585 key_parts= (uint) (key_part - table->key_info[idx].key_part);
1586 if (reverse == -1 &&
1587 !(table->file->index_flags(idx, key_parts-1, 1) & HA_READ_PREV))
1588 reverse= 0; // Index can't be used
1589 }
1590 ok:
1591 if (used_key_parts != NULL)
1592 *used_key_parts= key_parts;
1593 DBUG_RETURN(reverse);
1594 }
1595
1596
1597 /**
1598 Find shortest key suitable for full table scan.
1599
1600 @param table Table to scan
1601 @param usable_keys Allowed keys
1602
1603 @note
1604 As far as
1605 1) clustered primary key entry data set is a set of all record
1606 fields (key fields and not key fields) and
1607 2) secondary index entry data is a union of its key fields and
1608 primary key fields (at least InnoDB and its derivatives don't
1609 duplicate primary key fields there, even if the primary and
1610 the secondary keys have a common subset of key fields),
1611 then secondary index entry data is always a subset of primary key entry.
1612 Unfortunately, key_info[nr].key_length doesn't show the length
1613 of key/pointer pair but a sum of key field lengths only, thus
1614 we can't estimate index IO volume comparing only this key_length
1615 value of secondary keys and clustered PK.
1616 So, try secondary keys first, and choose PK only if there are no
1617 usable secondary covering keys or found best secondary key include
1618 all table fields (i.e. same as PK):
1619
1620 @return
1621 MAX_KEY no suitable key found
1622 key index otherwise
1623 */
1624
find_shortest_key(TABLE * table,const key_map * usable_keys)1625 uint find_shortest_key(TABLE *table, const key_map *usable_keys)
1626 {
1627 uint best= MAX_KEY;
1628 uint usable_clustered_pk= (table->file->primary_key_is_clustered() &&
1629 table->s->primary_key != MAX_KEY &&
1630 usable_keys->is_set(table->s->primary_key)) ?
1631 table->s->primary_key : MAX_KEY;
1632 if (!usable_keys->is_clear_all())
1633 {
1634 uint min_length= (uint) ~0;
1635 for (uint nr=0; nr < table->s->keys ; nr++)
1636 {
1637 if (nr == usable_clustered_pk)
1638 continue;
1639 if (usable_keys->is_set(nr))
1640 {
1641 /*
1642 Can not do full index scan on rtree index because it is not
1643 supported by Innodb, probably not supported by others either.
1644 */
1645 const KEY &key_ref= table->key_info[nr];
1646 if (key_ref.key_length < min_length &&
1647 !(key_ref.flags & HA_SPATIAL))
1648 {
1649 min_length=key_ref.key_length;
1650 best=nr;
1651 }
1652 }
1653 }
1654 }
1655 if (usable_clustered_pk != MAX_KEY)
1656 {
1657 /*
1658 If the primary key is clustered and found shorter key covers all table
1659 fields and is not clustering then primary key scan normally would be
1660 faster because amount of data to scan is the same but PK is clustered.
1661 It's safe to compare key parts with table fields since duplicate key
1662 parts aren't allowed.
1663 */
1664 if (best == MAX_KEY ||
1665 ((table->key_info[best].user_defined_key_parts >= table->s->fields)
1666 && !(table->file->index_flags(best, 0, 0) & HA_CLUSTERED_INDEX)))
1667 best= usable_clustered_pk;
1668 }
1669 return best;
1670 }
1671
1672 /**
1673 Test if a second key is the subkey of the first one.
1674
1675 @param key_part First key parts
1676 @param ref_key_part Second key parts
1677 @param ref_key_part_end Last+1 part of the second key
1678
1679 @note
1680 Second key MUST be shorter than the first one.
1681
1682 @retval
1683 1 is a subkey
1684 @retval
1685 0 no sub key
1686 */
1687
1688 inline bool
is_subkey(KEY_PART_INFO * key_part,KEY_PART_INFO * ref_key_part,KEY_PART_INFO * ref_key_part_end)1689 is_subkey(KEY_PART_INFO *key_part, KEY_PART_INFO *ref_key_part,
1690 KEY_PART_INFO *ref_key_part_end)
1691 {
1692 for (; ref_key_part < ref_key_part_end; key_part++, ref_key_part++)
1693 if (!key_part->field->eq(ref_key_part->field))
1694 return 0;
1695 return 1;
1696 }
1697
1698
1699 /**
1700 Test if REF_OR_NULL optimization will be used if the specified
1701 ref_key is used for REF-access to 'tab'
1702
1703 @retval
1704 true JT_REF_OR_NULL will be used
1705 @retval
1706 false no JT_REF_OR_NULL access
1707 */
1708
1709 static bool
is_ref_or_null_optimized(const JOIN_TAB * tab,uint ref_key)1710 is_ref_or_null_optimized(const JOIN_TAB *tab, uint ref_key)
1711 {
1712 if (tab->keyuse())
1713 {
1714 const Key_use *keyuse= tab->keyuse();
1715 while (keyuse->key != ref_key && keyuse->table_ref == tab->table_ref)
1716 keyuse++;
1717
1718 const table_map const_tables= tab->join()->const_table_map;
1719 while (keyuse->key == ref_key && keyuse->table_ref == tab->table_ref)
1720 {
1721 if (!(keyuse->used_tables & ~const_tables))
1722 {
1723 if (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)
1724 return true;
1725 }
1726 keyuse++;
1727 }
1728 }
1729 return false;
1730 }
1731
1732
1733 /**
1734 Test if we can use one of the 'usable_keys' instead of 'ref' key
1735 for sorting.
1736
1737 @param ref Number of key, used for WHERE clause
1738 @param usable_keys Keys for testing
1739
1740 @return
1741 - MAX_KEY If we can't use other key
1742 - the number of found key Otherwise
1743 */
1744
1745 static uint
test_if_subkey(ORDER * order,JOIN_TAB * tab,uint ref,uint ref_key_parts,const key_map * usable_keys)1746 test_if_subkey(ORDER *order, JOIN_TAB *tab, uint ref, uint ref_key_parts,
1747 const key_map *usable_keys)
1748 {
1749 uint nr;
1750 uint min_length= (uint) ~0;
1751 uint best= MAX_KEY;
1752 TABLE *table= tab->table();
1753 KEY_PART_INFO *ref_key_part= table->key_info[ref].key_part;
1754 KEY_PART_INFO *ref_key_part_end= ref_key_part + ref_key_parts;
1755
1756 for (nr= 0 ; nr < table->s->keys ; nr++)
1757 {
1758 if (usable_keys->is_set(nr) &&
1759 table->key_info[nr].key_length < min_length &&
1760 table->key_info[nr].user_defined_key_parts >= ref_key_parts &&
1761 is_subkey(table->key_info[nr].key_part, ref_key_part,
1762 ref_key_part_end) &&
1763 !is_ref_or_null_optimized(tab, nr) &&
1764 test_if_order_by_key(order, table, nr))
1765 {
1766 min_length= table->key_info[nr].key_length;
1767 best= nr;
1768 }
1769 }
1770 return best;
1771 }
1772
1773
1774 /**
1775 It is not obvious to see that test_if_skip_sort_order() never changes the
1776 plan if no_changes is true. So we double-check: creating an instance of this
1777 class saves some important access-path-related information of the current
1778 table; when the instance is destroyed, the latest access-path information is
1779 compared with saved data.
1780 */
1781
1782 class Plan_change_watchdog
1783 {
1784 #ifndef NDEBUG
1785 public:
1786 /**
1787 @param tab_arg table whose access path is being determined
1788 @param no_changes whether a change to the access path is allowed
1789 */
Plan_change_watchdog(const JOIN_TAB * tab_arg,const bool no_changes_arg)1790 Plan_change_watchdog(const JOIN_TAB *tab_arg, const bool no_changes_arg)
1791 {
1792 // Only to keep gcc 4.1.2-44 silent about uninitialized variables
1793 quick= NULL;
1794 quick_index= 0;
1795 if (no_changes_arg)
1796 {
1797 tab= tab_arg;
1798 type= tab->type();
1799 if ((quick= tab->quick()))
1800 quick_index= quick->index;
1801 use_quick= tab->use_quick;
1802 ref_key= tab->ref().key;
1803 ref_key_parts= tab->ref().key_parts;
1804 index= tab->index();
1805 }
1806 else
1807 {
1808 tab= NULL;
1809 // Only to keep gcc 4.1.2-44 silent about uninitialized variables
1810 type= JT_UNKNOWN;
1811 quick= NULL;
1812 ref_key= ref_key_parts= index= 0;
1813 use_quick= QS_NONE;
1814 }
1815 }
~Plan_change_watchdog()1816 ~Plan_change_watchdog()
1817 {
1818 if (tab == NULL)
1819 return;
1820 // changes are not allowed, we verify:
1821 assert(tab->type() == type);
1822 assert(tab->quick() == quick);
1823 assert((quick == NULL) || tab->quick()->index == quick_index);
1824 assert(tab->use_quick == use_quick);
1825 assert(tab->ref().key == ref_key);
1826 assert(tab->ref().key_parts == ref_key_parts);
1827 assert(tab->index() == index);
1828 }
1829 private:
1830 const JOIN_TAB *tab; ///< table, or NULL if changes are allowed
1831 enum join_type type; ///< copy of tab->type()
1832 // "Range / index merge" info
1833 const QUICK_SELECT_I *quick; ///< copy of tab->select->quick
1834 uint quick_index; ///< copy of tab->select->quick->index
1835 enum quick_type use_quick; ///< copy of tab->use_quick
1836 // "ref access" info
1837 int ref_key; ///< copy of tab->ref().key
1838 uint ref_key_parts;/// copy of tab->ref().key_parts
1839 // Other index-related info
1840 uint index; ///< copy of tab->index
1841 #else // in non-debug build, empty class
1842 public:
1843 Plan_change_watchdog(const JOIN_TAB *tab_arg, const bool no_changes_arg) {}
1844 #endif
1845 };
1846
1847
1848 /**
1849 Test if we can skip ordering by using an index.
1850
1851 If the current plan is to use an index that provides ordering, the
1852 plan will not be changed. Otherwise, if an index can be used, the
1853 JOIN_TAB / tab->select struct is changed to use the index.
1854
1855 The index must cover all fields in <order>, or it will not be considered.
1856
1857 @param tab NULL or JOIN_TAB of the accessed table
1858 @param order Linked list of ORDER BY arguments
1859 @param select_limit LIMIT value, or HA_POS_ERROR if no limit
1860 @param no_changes No changes will be made to the query plan.
1861 @param map key_map of applicable indexes.
1862 @param clause_type "ORDER BY" etc for printing in optimizer trace
1863
1864 @todo
1865 - sergeyp: Results of all index merge selects actually are ordered
1866 by clustered PK values.
1867
1868 @note
1869 This function may change tmp_table_param.precomputed_group_by. This
1870 affects how create_tmp_table() treats aggregation functions, so
1871 count_field_types() must be called again to make sure this is taken
1872 into consideration.
1873
1874 @retval
1875 0 We have to use filesort to do the sorting
1876 @retval
1877 1 We can use an index.
1878 */
1879
1880 static bool
test_if_skip_sort_order(JOIN_TAB * tab,ORDER * order,ha_rows select_limit,const bool no_changes,const key_map * map,const char * clause_type)1881 test_if_skip_sort_order(JOIN_TAB *tab, ORDER *order, ha_rows select_limit,
1882 const bool no_changes, const key_map *map,
1883 const char *clause_type)
1884 {
1885 int ref_key;
1886 uint ref_key_parts= 0;
1887 int order_direction= 0;
1888 uint used_key_parts;
1889 TABLE *const table= tab->table();
1890 JOIN *const join= tab->join();
1891 THD *const thd= join->thd;
1892 QUICK_SELECT_I *const save_quick= tab->quick();
1893 int best_key= -1;
1894 bool set_up_ref_access_to_key= false;
1895 bool can_skip_sorting= false; // used as return value
1896 int changed_key= -1;
1897 DBUG_ENTER("test_if_skip_sort_order");
1898
1899 /* Check that we are always called with first non-const table */
1900 assert((uint)tab->idx() == join->const_tables);
1901
1902 Plan_change_watchdog watchdog(tab, no_changes);
1903
1904 /* Sorting a single row can always be skipped */
1905 if (tab->type() == JT_EQ_REF ||
1906 tab->type() == JT_CONST ||
1907 tab->type() == JT_SYSTEM)
1908 {
1909 DBUG_RETURN(1);
1910 }
1911
1912 /*
1913 Check if FT index can be used to retrieve result in the required order.
1914 It is possible if ordering is on the first non-constant table.
1915 */
1916 if (join->order && join->simple_order)
1917 {
1918 /*
1919 Check if ORDER is DESC, ORDER BY is a single MATCH function.
1920 */
1921 Item_func_match *ft_func= test_if_ft_index_order(order);
1922 /*
1923 Two possible cases when we can skip sort order:
1924 1. FT_SORTED must be set(Natural mode, no ORDER BY).
1925 2. If FT_SORTED flag is not set then
1926 the engine should support deferred sorting. Deferred sorting means
1927 that sorting is postponed utill the start of index reading(InnoDB).
1928 In this case we set FT_SORTED flag here to let the engine know that
1929 internal sorting is needed.
1930 */
1931 if (ft_func && ft_func->ft_handler && ft_func->ordered_result())
1932 {
1933 /*
1934 FT index scan is used, so the only additional requirement is
1935 that ORDER BY MATCH function is the same as the function that
1936 is used for FT index.
1937 */
1938 if (tab->type() == JT_FT &&
1939 ft_func->eq(tab->position()->key->val, true))
1940 {
1941 ft_func->set_hints(join, FT_SORTED, select_limit, false);
1942 DBUG_RETURN(true);
1943 }
1944 /*
1945 No index is used, it's possible to use FT index for ORDER BY if
1946 LIMIT is present and does not exceed count of the records in FT index
1947 and there is no WHERE condition since a condition may potentially
1948 require more rows to be fetch from FT index.
1949 */
1950 else if (!tab->condition() &&
1951 select_limit != HA_POS_ERROR &&
1952 select_limit <= ft_func->get_count())
1953 {
1954 /* test_if_ft_index_order() always returns master MATCH function. */
1955 assert(!ft_func->master);
1956 /* ref is not set since there is no WHERE condition */
1957 assert(tab->ref().key == -1);
1958
1959 /*Make EXPLAIN happy */
1960 tab->set_type(JT_FT);
1961 tab->ref().key= ft_func->key;
1962 tab->ref().key_parts= 0;
1963 tab->set_index(ft_func->key);
1964 tab->set_ft_func(ft_func);
1965
1966 /* Setup FT handler */
1967 ft_func->set_hints(join, FT_SORTED, select_limit, true);
1968 ft_func->join_key= true;
1969 table->file->ft_handler= ft_func->ft_handler;
1970 DBUG_RETURN(true);
1971 }
1972 }
1973 }
1974
1975 /*
1976 Keys disabled by ALTER TABLE ... DISABLE KEYS should have already
1977 been taken into account.
1978 */
1979 key_map usable_keys= *map;
1980
1981 for (ORDER *tmp_order=order; tmp_order ; tmp_order=tmp_order->next)
1982 {
1983 Item *item= (*tmp_order->item)->real_item();
1984 if (item->type() != Item::FIELD_ITEM)
1985 {
1986 usable_keys.clear_all();
1987 DBUG_RETURN(0);
1988 }
1989 usable_keys.intersect(((Item_field*) item)->field->part_of_sortkey);
1990 if (usable_keys.is_clear_all())
1991 DBUG_RETURN(0); // No usable keys
1992 }
1993 if (tab->type() == JT_REF_OR_NULL || tab->type() == JT_FT)
1994 DBUG_RETURN(0);
1995
1996 ref_key= -1;
1997 /* Test if constant range in WHERE */
1998 if (tab->type() == JT_REF)
1999 {
2000 assert(tab->ref().key >= 0 && tab->ref().key_parts);
2001 ref_key= tab->ref().key;
2002 ref_key_parts= tab->ref().key_parts;
2003 }
2004 else if (tab->type() == JT_RANGE || tab->type() == JT_INDEX_MERGE)
2005 {
2006 // Range found by opt_range
2007 int quick_type= tab->quick()->get_type();
2008 /*
2009 assume results are not ordered when index merge is used
2010 TODO: sergeyp: Results of all index merge selects actually are ordered
2011 by clustered PK values.
2012 */
2013
2014 if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE ||
2015 quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION ||
2016 quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT)
2017 DBUG_RETURN(0);
2018 ref_key= tab->quick()->index;
2019 ref_key_parts= tab->quick()->used_key_parts;
2020 }
2021 else if (tab->type() == JT_INDEX_SCAN)
2022 {
2023 // The optimizer has decided to use an index scan.
2024 ref_key= tab->index();
2025 ref_key_parts= actual_key_parts(&table->key_info[tab->index()]);
2026 }
2027
2028 Opt_trace_context * const trace= &thd->opt_trace;
2029 Opt_trace_object trace_wrapper(trace);
2030 Opt_trace_object
2031 trace_skip_sort_order(trace, "reconsidering_access_paths_for_index_ordering");
2032 trace_skip_sort_order.add_alnum("clause", clause_type);
2033 Opt_trace_array trace_steps(trace, "steps");
2034
2035 if (ref_key >= 0)
2036 {
2037 /*
2038 We come here when ref/index scan/range scan access has been set
2039 up for this table. Do not change access method if ordering is
2040 provided already.
2041 */
2042 if (!usable_keys.is_set(ref_key))
2043 {
2044 /*
2045 We come here when ref_key is not among usable_keys, try to find a
2046 usable prefix key of that key.
2047 */
2048 uint new_ref_key;
2049 /*
2050 If using index only read, only consider other possible index only
2051 keys
2052 */
2053 if (table->covering_keys.is_set(ref_key))
2054 usable_keys.intersect(table->covering_keys);
2055
2056 if ((new_ref_key= test_if_subkey(order, tab, ref_key, ref_key_parts,
2057 &usable_keys)) < MAX_KEY)
2058 {
2059 /* Found key that can be used to retrieve data in sorted order */
2060 if (tab->ref().key >= 0)
2061 {
2062 /*
2063 We'll use ref access method on key new_ref_key. The actual change
2064 is done further down in this function where we update the plan.
2065 */
2066 set_up_ref_access_to_key= true;
2067 }
2068 else if (!no_changes)
2069 {
2070 /*
2071 The range optimizer constructed QUICK_RANGE for ref_key, and
2072 we want to use instead new_ref_key as the index. We can't
2073 just change the index of the quick select, because this may
2074 result in an incosistent QUICK_SELECT object. Below we
2075 create a new QUICK_SELECT from scratch so that all its
2076 parameres are set correctly by the range optimizer.
2077
2078 Note that the range optimizer is NOT called if
2079 no_changes==true. This reason is that the range optimizer
2080 cannot find a QUICK that can return ordered result unless
2081 index access (ref or index scan) is also able to do so
2082 (which test_if_order_by_key () will tell).
2083 Admittedly, range access may be much more efficient than
2084 e.g. index scan, but the only thing that matters when
2085 no_change==true is the answer to the question: "Is it
2086 possible to avoid sorting if an index is used to access
2087 this table?". The answer does not depend on the outcome of
2088 the range optimizer.
2089 */
2090 key_map new_ref_key_map; // Force the creation of quick select
2091 new_ref_key_map.set_bit(new_ref_key); // only for new_ref_key.
2092
2093 Opt_trace_object trace_wrapper(trace);
2094 Opt_trace_object
2095 trace_recest(trace, "rows_estimation");
2096 trace_recest.add_utf8_table(tab->table_ref).
2097 add_utf8("index", table->key_info[new_ref_key].name);
2098 QUICK_SELECT_I *qck;
2099 const bool no_quick=
2100 test_quick_select(thd, new_ref_key_map,
2101 0, // empty table_map
2102 join->calc_found_rows ?
2103 HA_POS_ERROR :
2104 join->unit->select_limit_cnt,
2105 false, // don't force quick range
2106 order->direction, tab,
2107 // we are after make_join_select():
2108 tab->condition(), &tab->needed_reg, &qck,
2109 tab->table()->force_index) <= 0;
2110 assert(tab->quick() == save_quick);
2111 tab->set_quick(qck);
2112 if (no_quick)
2113 {
2114 can_skip_sorting= false;
2115 goto fix_ICP;
2116 }
2117 }
2118 ref_key= new_ref_key;
2119 changed_key= new_ref_key;
2120 }
2121 }
2122 /* Check if we get the rows in requested sorted order by using the key */
2123 if (usable_keys.is_set(ref_key) &&
2124 (order_direction= test_if_order_by_key(order,table,ref_key,
2125 &used_key_parts)))
2126 goto check_reverse_order;
2127 }
2128 {
2129 /*
2130 There is no ref/index scan/range scan access set up for this
2131 table, or it does not provide the requested ordering. Do a
2132 cost-based search on all keys.
2133 */
2134 uint best_key_parts= 0;
2135 uint saved_best_key_parts= 0;
2136 int best_key_direction= 0;
2137 ha_rows table_records= table->file->stats.records;
2138
2139 /*
2140 If an index scan that cannot provide ordering has been selected
2141 then do not use the index scan key as starting hint to
2142 test_if_cheaper_ordering()
2143 */
2144 const int ref_key_hint= (order_direction == 0 &&
2145 tab->type() == JT_INDEX_SCAN) ? -1 : ref_key;
2146
2147 /*
2148 Does the query have a "FORCE INDEX [FOR GROUP BY] (idx)" (if
2149 clause is group by) or a "FORCE INDEX [FOR ORDER BY] (idx)" (if
2150 clause is order by)?
2151 */
2152 const bool is_group_by= join && join->grouped && order == join->group_list;
2153 const bool is_force_index= table->force_index ||
2154 (is_group_by ? table->force_index_group : table->force_index_order);
2155
2156 /*
2157 Find an ordering index alternative over the chosen plan iff
2158 prefer_ordering_index switch is on. This switch is overridden only when
2159 force index for order/group is specified.
2160 */
2161 if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_PREFER_ORDERING_INDEX) ||
2162 is_force_index)
2163 test_if_cheaper_ordering(tab, order, table, usable_keys,
2164 ref_key_hint,
2165 select_limit,
2166 &best_key, &best_key_direction,
2167 &select_limit, &best_key_parts,
2168 &saved_best_key_parts);
2169
2170 if (best_key < 0)
2171 {
2172 // No usable key has been found
2173 can_skip_sorting= false;
2174 goto fix_ICP;
2175 }
2176
2177 /*
2178 filesort() and join cache are usually faster than reading in
2179 index order and not using join cache. Don't use index scan
2180 unless:
2181 - the user specified FORCE INDEX [FOR {GROUP|ORDER} BY] (have to assume
2182 the user knows what's best)
2183 - the chosen index is clustered primary key (table scan is not cheaper)
2184 */
2185 if (!is_force_index &&
2186 (select_limit >= table_records) &&
2187 (tab->type() == JT_ALL &&
2188 join->primary_tables > join->const_tables + 1) &&
2189 ((unsigned) best_key != table->s->primary_key ||
2190 !table->file->primary_key_is_clustered()))
2191 {
2192 can_skip_sorting= false;
2193 goto fix_ICP;
2194 }
2195
2196 if (table->quick_keys.is_set(best_key) &&
2197 !tab->quick_order_tested.is_set(best_key) &&
2198 best_key != ref_key)
2199 {
2200 tab->quick_order_tested.set_bit(best_key);
2201 Opt_trace_object trace_wrapper(trace);
2202 Opt_trace_object
2203 trace_recest(trace, "rows_estimation");
2204 trace_recest.add_utf8_table(tab->table_ref).
2205 add_utf8("index", table->key_info[best_key].name);
2206
2207 key_map keys_to_use; // Force the creation of quick select
2208 keys_to_use.set_bit(best_key); // only best_key.
2209 QUICK_SELECT_I *qck;
2210 test_quick_select(thd,
2211 keys_to_use,
2212 0, // empty table_map
2213 join->calc_found_rows ?
2214 HA_POS_ERROR :
2215 join->unit->select_limit_cnt,
2216 true, // force quick range
2217 order->direction, tab, tab->condition(),
2218 &tab->needed_reg, &qck, tab->table()->force_index);
2219 /*
2220 If tab->quick() pointed to another quick than save_quick, we would
2221 lose access to it and leak memory.
2222 */
2223 assert(tab->quick() == save_quick || tab->quick() == NULL);
2224 tab->set_quick(qck);
2225 if (qck && !no_changes)
2226 tab->set_type(calc_join_type(qck->get_type()));
2227 }
2228 order_direction= best_key_direction;
2229 /*
2230 saved_best_key_parts is actual number of used keyparts found by the
2231 test_if_order_by_key function. It could differ from keyinfo->key_parts,
2232 thus we have to restore it in case of desc order as it affects
2233 QUICK_SELECT_DESC behaviour.
2234 */
2235 used_key_parts= (order_direction == -1) ?
2236 saved_best_key_parts : best_key_parts;
2237 changed_key= best_key;
2238 // We will use index scan or range scan:
2239 set_up_ref_access_to_key= false;
2240 }
2241
2242 check_reverse_order:
2243 assert(order_direction != 0);
2244
2245 if (order_direction == -1) // If ORDER BY ... DESC
2246 {
2247 if (tab->quick())
2248 {
2249 /*
2250 Don't reverse the sort order, if it's already done.
2251 (In some cases test_if_order_by_key() can be called multiple times
2252 */
2253 if (tab->quick()->reverse_sorted())
2254 {
2255 can_skip_sorting= true;
2256 goto fix_ICP;
2257 }
2258
2259 if (tab->quick()->reverse_sort_possible())
2260 can_skip_sorting= true;
2261 else
2262 {
2263 can_skip_sorting= false;
2264 goto fix_ICP;
2265 }
2266 }
2267 else
2268 {
2269 // Other index access (ref or scan) poses no problem
2270 can_skip_sorting= true;
2271 }
2272 }
2273 else
2274 {
2275 // ORDER BY ASC poses no problem
2276 can_skip_sorting= true;
2277 }
2278
2279 assert(can_skip_sorting);
2280
2281 /*
2282 Update query plan with access pattern for doing
2283 ordered access according to what we have decided
2284 above.
2285 */
2286 if (!no_changes) // We are allowed to update QEP
2287 {
2288 if (set_up_ref_access_to_key)
2289 {
2290 /*
2291 We'll use ref access method on key changed_key. In general case
2292 the index search tuple for changed_ref_key will be different (e.g.
2293 when one index is defined as (part1, part2, ...) and another as
2294 (part1, part2(N), ...) and the WHERE clause contains
2295 "part1 = const1 AND part2=const2".
2296 So we build tab->ref() from scratch here.
2297 */
2298 Key_use *keyuse= tab->keyuse();
2299 while (keyuse->key != (uint)changed_key &&
2300 keyuse->table_ref == tab->table_ref)
2301 keyuse++;
2302
2303 if (create_ref_for_key(join, tab, keyuse, tab->prefix_tables()))
2304 {
2305 can_skip_sorting= false;
2306 goto fix_ICP;
2307 }
2308
2309 assert(tab->type() != JT_REF_OR_NULL && tab->type() != JT_FT);
2310
2311 // Changing the key makes filter_effect obsolete
2312 tab->position()->filter_effect= COND_FILTER_STALE;
2313 }
2314 else if (best_key >= 0)
2315 {
2316 /*
2317 If ref_key used index tree reading only ('Using index' in EXPLAIN),
2318 and best_key doesn't, then revert the decision.
2319 */
2320 if(!table->covering_keys.is_set(best_key))
2321 table->set_keyread(false);
2322 if (!tab->quick() || tab->quick() == save_quick) // created no QUICK
2323 {
2324 // Avoid memory leak:
2325 assert(tab->quick() == save_quick || tab->quick() == NULL);
2326 tab->set_quick(NULL);
2327 tab->set_index(best_key);
2328 tab->set_type(JT_INDEX_SCAN); // Read with index_first(), index_next()
2329 /*
2330 There is a bug. When we change here, e.g. from group_min_max to
2331 index scan: loose index scan expected to read a small number of rows
2332 (jumping through the index), this small number was in
2333 position()->rows_fetched; index scan will read much more, so
2334 rows_fetched should be updated. So should the filtering effect.
2335 It is visible in main.distinct in trunk:
2336 explain SELECT distinct a from t3 order by a desc limit 2;
2337 id select_type table partitions type possible_keys key key_len ref rows filtered Extra
2338 1 SIMPLE t3 NULL index a a 5 NULL 40 25.00 Using index
2339 "rows=40" should be ~200 i.e. # of records in table. Filter should be
2340 100.00 (no WHERE).
2341 */
2342 table->file->ha_index_or_rnd_end();
2343 if (thd->lex->is_explain())
2344 {
2345 /*
2346 @todo this neutralizes add_ref_to_table_cond(); as a result
2347 EXPLAIN shows no "using where" though real SELECT has one.
2348 */
2349 tab->ref().key= -1;
2350 tab->ref().key_parts= 0;
2351 }
2352 tab->position()->filter_effect= COND_FILTER_STALE;
2353 }
2354 else if (tab->type() != JT_ALL)
2355 {
2356 /*
2357 We're about to use a quick access to the table.
2358 We need to change the access method so as the quick access
2359 method is actually used.
2360 */
2361 assert(tab->quick());
2362 assert(tab->quick()->index==(uint)best_key);
2363 tab->set_type(calc_join_type(tab->quick()->get_type()));
2364 tab->use_quick=QS_RANGE;
2365 tab->ref().key= -1;
2366 tab->ref().key_parts=0; // Don't use ref key.
2367 if (tab->quick()->is_loose_index_scan())
2368 join->tmp_table_param.precomputed_group_by= TRUE;
2369 tab->position()->filter_effect= COND_FILTER_STALE;
2370 }
2371 } // best_key >= 0
2372
2373 if (order_direction == -1) // If ORDER BY ... DESC
2374 {
2375 if (tab->quick())
2376 {
2377 /* ORDER BY range_key DESC */
2378 QUICK_SELECT_I *tmp= tab->quick()->make_reverse(used_key_parts);
2379 if (!tmp)
2380 {
2381 /* purecov: begin inspected */
2382 can_skip_sorting= false; // Reverse sort failed -> filesort
2383 goto fix_ICP;
2384 /* purecov: end */
2385 }
2386 if (tab->quick() != tmp && tab->quick() != save_quick)
2387 delete tab->quick();
2388 tab->set_quick(tmp);
2389 tab->set_type(calc_join_type(tmp->get_type()));
2390 tab->position()->filter_effect= COND_FILTER_STALE;
2391 }
2392 else if (tab->type() == JT_REF &&
2393 tab->ref().key_parts <= used_key_parts)
2394 {
2395 /*
2396 SELECT * FROM t1 WHERE a=1 ORDER BY a DESC,b DESC
2397
2398 Use a traversal function that starts by reading the last row
2399 with key part (A) and then traverse the index backwards.
2400 */
2401 tab->reversed_access= true;
2402
2403 /*
2404 The current implementation of join_read_prev_same() does not
2405 work well in combination with ICP and can lead to increased
2406 execution time. Setting changed_key to the current key
2407 (based on that we change the access order for the key) will
2408 ensure that a pushed index condition will be cancelled.
2409 */
2410 changed_key= tab->ref().key;
2411 }
2412 else if (tab->type() == JT_INDEX_SCAN)
2413 tab->reversed_access= true;
2414 }
2415 else if (tab->quick())
2416 tab->quick()->need_sorted_output();
2417
2418 } // QEP has been modified
2419
2420 fix_ICP:
2421 /*
2422 Cleanup:
2423 We may have both a 'tab->quick()' and 'save_quick' (original)
2424 at this point. Delete the one that we won't use.
2425 */
2426 if (can_skip_sorting && !no_changes)
2427 {
2428 if (tab->type() == JT_INDEX_SCAN &&
2429 select_limit < table->file->stats.records)
2430 {
2431 tab->position()->rows_fetched= select_limit;
2432 tab->position()->filter_effect= COND_FILTER_STALE_NO_CONST;
2433 }
2434
2435 // Keep current (ordered) tab->quick()
2436 if (save_quick != tab->quick())
2437 delete save_quick;
2438 }
2439 else
2440 {
2441 // Restore original save_quick
2442 if (tab->quick() != save_quick)
2443 {
2444 delete tab->quick();
2445 tab->set_quick(save_quick);
2446 }
2447 }
2448
2449 trace_steps.end();
2450 Opt_trace_object
2451 trace_change_index(trace, "index_order_summary");
2452 trace_change_index.add_utf8_table(tab->table_ref)
2453 .add("index_provides_order", can_skip_sorting)
2454 .add_alnum("order_direction", order_direction == 1 ? "asc" :
2455 ((order_direction == -1) ? "desc" :
2456 "undefined"));
2457
2458 if (changed_key >= 0)
2459 {
2460 // switching to another index
2461 // Should be no pushed conditions at this point
2462 assert(!table->file->pushed_idx_cond);
2463 if (unlikely(trace->is_started()))
2464 {
2465 trace_change_index.add_utf8("index", table->key_info[changed_key].name);
2466 trace_change_index.add("plan_changed", !no_changes);
2467 if (!no_changes)
2468 trace_change_index.add_alnum("access_type", join_type_str[tab->type()]);
2469 }
2470 }
2471 else if (unlikely(trace->is_started()))
2472 {
2473 trace_change_index.add_utf8("index",
2474 ref_key >= 0 ?
2475 table->key_info[ref_key].name : "unknown");
2476 trace_change_index.add("plan_changed", false);
2477 }
2478 DBUG_RETURN(can_skip_sorting);
2479 }
2480
2481
2482 /**
2483 Prune partitions for all tables of a join (query block).
2484
2485 Requires that tables have been locked.
2486
2487 @returns false if success, true if error
2488 */
2489
prune_table_partitions()2490 bool JOIN::prune_table_partitions()
2491 {
2492 assert(select_lex->partitioned_table_count);
2493
2494 for (TABLE_LIST *tbl= select_lex->leaf_tables; tbl; tbl= tbl->next_leaf)
2495 {
2496 /*
2497 If tbl->embedding!=NULL that means that this table is in the inner
2498 part of the nested outer join, and we can't do partition pruning
2499 (TODO: check if this limitation can be lifted.
2500 This also excludes semi-joins. Is that intentional?)
2501 This will try to prune non-static conditions, which can
2502 be used after the tables are locked.
2503 */
2504 if (!tbl->embedding)
2505 {
2506 Item *prune_cond= tbl->join_cond_optim() ?
2507 tbl->join_cond_optim() : where_cond;
2508 if (prune_partitions(thd, tbl->table, prune_cond))
2509 return true;
2510 }
2511 }
2512
2513 return false;
2514 }
2515
2516
2517 /**
2518 A helper function to check whether it's better to use range than ref.
2519
2520 @details
2521 Heuristic: Switch from 'ref' to 'range' access if 'range'
2522 access can utilize more keyparts than 'ref' access. Conditions
2523 for doing switching:
2524
2525 1) Range access is possible Or tab->dodgy_ref_cost is set.
2526 2) This function is not relevant for FT, since there is no range access for
2527 that type of index.
2528 3) Used parts of key shouldn't have nullable parts & ref_or_null isn't used.
2529 4) 'ref' access depends on a constant, not a value read from a
2530 table earlier in the join sequence.
2531
2532 Rationale: if 'ref' depends on a value from another table,
2533 the join condition is not used to limit the rows read by
2534 'range' access (that would require dynamic range - 'Range
2535 checked for each record'). In other words, if 'ref' depends
2536 on a value from another table, we have a query with
2537 conditions of the form
2538
2539 this_table.idx_col1 = other_table.col AND <<- used by 'ref'
2540 this_table.idx_col1 OP <const> AND <<- used by 'range'
2541 this_table.idx_col2 OP <const> AND ... <<- used by 'range'
2542
2543 and an index on (idx_col1,idx_col2,...). But the fact that
2544 'range' access uses more keyparts does not mean that it is
2545 more selective than 'ref' access because these access types
2546 utilize different parts of the query condition. We
2547 therefore trust the cost based choice made by
2548 best_access_path() instead of forcing a heuristic choice
2549 here.
2550 5a) 'ref' access and 'range' access uses the same index.
2551 5b) 'range' access uses more keyparts than 'ref' access.
2552
2553 OR
2554
2555 6) Ref has borrowed the index estimate from range and created a cost
2556 estimate (See Optimize_table_order::find_best_ref). This will be a
2557 problem if range built it's row estimate using a larger number of key
2558 parts than ref. In such a case, shift to range access over the same
2559 index. So run the range optimizer with that index as the only choice.
2560 (Condition 5 is not relevant here since it has been tested in
2561 find_best_ref.)
2562
2563 @param thd THD To re-run range optimizer.
2564 @param tab JOIN_TAB To check the above conditions.
2565
2566 @return true Range is better than ref
2567 @return false Ref is better or switch isn't possible
2568
2569 @todo: This decision should rather be made in best_access_path()
2570 */
2571
can_switch_from_ref_to_range(THD * thd,JOIN_TAB * tab)2572 static bool can_switch_from_ref_to_range(THD *thd, JOIN_TAB *tab)
2573 {
2574 if ((tab->quick() || tab->dodgy_ref_cost) && // 1)
2575 tab->position()->key->keypart != FT_KEYPART) // 2)
2576 {
2577 uint keyparts= 0, length= 0;
2578 table_map dep_map= 0;
2579 bool maybe_null= false;
2580
2581 calc_length_and_keyparts(tab->position()->key, tab,
2582 tab->position()->key->key,
2583 tab->prefix_tables(), NULL, &length, &keyparts,
2584 &dep_map, &maybe_null);
2585 if (maybe_null || // 3)
2586 dep_map) // 4)
2587 return false;
2588
2589 if (tab->quick() &&
2590 tab->position()->key->key == tab->quick()->index) // 5a)
2591 return length < tab->quick()->max_used_key_length; // 5b)
2592 else if (tab->dodgy_ref_cost) // 6)
2593 {
2594 key_map new_ref_key_map;
2595 new_ref_key_map.set_bit(tab->position()->key->key);
2596
2597 Opt_trace_context * const trace= &thd->opt_trace;
2598 Opt_trace_object trace_wrapper(trace);
2599
2600 Opt_trace_object
2601 can_switch(trace, "check_if_range_uses_more_keyparts_than_ref");
2602 Opt_trace_object
2603 trace_setup_cond(trace, "rerunning_range_optimizer_for_single_index");
2604
2605 QUICK_SELECT_I *qck;
2606 if (test_quick_select(thd, new_ref_key_map,
2607 0, // empty table_map
2608 tab->join()->row_limit,
2609 false, // don't force quick range
2610 ORDER::ORDER_NOT_RELEVANT,
2611 tab,
2612 tab->join_cond() ? tab->join_cond() :
2613 tab->join()->where_cond,
2614 &tab->needed_reg,
2615 &qck, true) > 0)
2616 {
2617 if (length < qck->max_used_key_length)
2618 {
2619 delete tab->quick();
2620 tab->set_quick(qck);
2621 return true;
2622 }
2623 else
2624 {
2625 Opt_trace_object (trace, "access_type_unchanged").
2626 add("ref_key_length", length).
2627 add("range_key_length", qck->max_used_key_length);
2628 delete qck;
2629 }
2630 }
2631 }
2632 }
2633 return false;
2634 }
2635
2636 /**
2637 An utility function - apply heuristics and optimize access methods to tables.
2638 Currently this function can change REF to RANGE and ALL to INDEX scan if
2639 latter is considered to be better (not cost-based) than the former.
2640 @note Side effect - this function could set 'Impossible WHERE' zero
2641 result.
2642 */
2643
adjust_access_methods()2644 void JOIN::adjust_access_methods()
2645 {
2646 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
2647 for (uint i= const_tables; i < tables; i++)
2648 {
2649 JOIN_TAB *const tab= best_ref[i];
2650 TABLE_LIST *const tl= tab->table_ref;
2651
2652 if (tab->type() == JT_ALL)
2653 {
2654 /*
2655 It's possible to speedup query by switching from full table scan to
2656 the scan of covering index, due to less data being read.
2657 Prerequisites for this are:
2658 1) Keyread (i.e index only scan) is allowed (table isn't updated/deleted
2659 from)
2660 2) Covering indexes are available
2661 3) This isn't a derived table/materialized view
2662 */
2663 if (!tab->table()->no_keyread && // 1
2664 !tab->table()->covering_keys.is_clear_all() && // 2
2665 !tl->uses_materialization()) // 3
2666 {
2667 /*
2668 It has turned out that the change commented out below, while speeding
2669 things up for disk-bound loads, slows them down for cases when the data
2670 is in disk cache (see BUG#35850):
2671 // See bug #26447: "Using the clustered index for a table scan
2672 // is always faster than using a secondary index".
2673 if (table->s->primary_key != MAX_KEY &&
2674 table->file->primary_key_is_clustered())
2675 tab->index= table->s->primary_key;
2676 else
2677 tab->index=find_shortest_key(table, & table->covering_keys);
2678 */
2679 if (tab->position()->sj_strategy != SJ_OPT_LOOSE_SCAN)
2680 tab->set_index(find_shortest_key(tab->table(), &tab->table()->covering_keys));
2681 tab->set_type(JT_INDEX_SCAN); // Read with index_first / index_next
2682 // From table scan to index scan, thus filter effect needs no recalc.
2683 }
2684 else if (!tab->table()->no_keyread && !tl->uses_materialization())
2685 {
2686 assert(tab->table()->covering_keys.is_clear_all());
2687 if (tab->position()->sj_strategy != SJ_OPT_LOOSE_SCAN)
2688 {
2689 key_map clustering_keys;
2690 for (uint i= 0; i < tab->table()->s->keys; i++)
2691 {
2692 if (tab->keys().is_set(i)
2693 && tab->table()->file->index_flags(i, 0, 0)
2694 & HA_CLUSTERED_INDEX)
2695 clustering_keys.set_bit(i);
2696 }
2697 uint index= find_shortest_key(tab->table(), &clustering_keys);
2698 if (index != MAX_KEY)
2699 {
2700 tab->set_type(JT_INDEX_SCAN);
2701 tab->set_index(index);
2702 }
2703 }
2704 }
2705 }
2706 else if (tab->type() == JT_REF)
2707 {
2708 if (can_switch_from_ref_to_range(thd, tab))
2709 {
2710 tab->set_type(JT_RANGE);
2711
2712 Opt_trace_context * const trace= &thd->opt_trace;
2713 Opt_trace_object wrapper(trace);
2714 Opt_trace_object (trace, "access_type_changed").
2715 add_utf8_table(tl).
2716 add_utf8("index",
2717 tab->table()->key_info[tab->position()->key->key].name).
2718 add_alnum("old_type", "ref").
2719 add_alnum("new_type", join_type_str[tab->type()]).
2720 add_alnum("cause", "uses_more_keyparts");
2721
2722 tab->use_quick= QS_RANGE;
2723 tab->position()->filter_effect= COND_FILTER_STALE;
2724 }
2725 else
2726 {
2727 // Cleanup quick, REF/REF_OR_NULL/EQ_REF, will be clarified later
2728 delete tab->quick();
2729 tab->set_quick(NULL);
2730 }
2731 }
2732 // Ensure AM consistency
2733 assert(!(tab->quick() && (tab->type() == JT_REF || tab->type() == JT_ALL)));
2734 assert((tab->type() != JT_RANGE && tab->type() != JT_INDEX_MERGE) ||
2735 tab->quick());
2736 if (!tab->const_keys.is_clear_all() &&
2737 tab->table()->reginfo.impossible_range &&
2738 ((i == const_tables && tab->type() == JT_REF) ||
2739 ((tab->type() == JT_ALL || tab->type() == JT_RANGE ||
2740 tab->type() == JT_INDEX_MERGE || tab->type() == JT_INDEX_SCAN) &&
2741 tab->use_quick != QS_RANGE)) &&
2742 !tab->table_ref->is_inner_table_of_outer_join())
2743 zero_result_cause=
2744 "Impossible WHERE noticed after reading const tables";
2745 }
2746 }
2747
2748
alloc_jtab_array(THD * thd,uint table_count)2749 static JOIN_TAB *alloc_jtab_array(THD *thd, uint table_count)
2750 {
2751 JOIN_TAB *t= new (thd->mem_root) JOIN_TAB[table_count];
2752 if (!t)
2753 return NULL; /* purecov: inspected */
2754
2755 QEP_shared *qs= new (thd->mem_root) QEP_shared[table_count];
2756 if (!qs)
2757 return NULL; /* purecov: inspected */
2758
2759 for (uint i= 0; i < table_count; ++i)
2760 t[i].set_qs(qs++);
2761
2762 return t;
2763 }
2764
2765
2766 /**
2767 Set up JOIN_TAB structs according to the picked join order in best_positions.
2768 This allocates execution structures so may be called only after we have the
2769 very final plan. It must be called after
2770 Optimize_table_order::fix_semijoin_strategies().
2771
2772 @return False if success, True if error
2773
2774 @details
2775 - create join->join_tab array and copy from existing JOIN_TABs in join order
2776 - create helper structs for materialized semi-join handling
2777 - finalize semi-join strategy choices
2778 - Number of intermediate tables "tmp_tables" is calculated.
2779 - "tables" and "primary_tables" are recalculated.
2780 - for full and index scans info of estimated # of records is updated.
2781 - in a helper function:
2782 - all heuristics are applied and the final access method type is picked
2783 for each join_tab (only test_if_skip_sortorder() could override it)
2784 - AM consistency is ensured (e.g only range and index merge are allowed
2785 to have quick select set).
2786 - if "Impossible WHERE" is detected - appropriate zero_result_cause is
2787 set.
2788
2789 Notice that intermediate tables will not have a POSITION reference; and they
2790 will not have a TABLE reference before the final stages of code generation.
2791
2792 @todo the block which sets tab->type should move to adjust_access_methods
2793 for unification.
2794 */
2795
get_best_combination()2796 bool JOIN::get_best_combination()
2797 {
2798 DBUG_ENTER("JOIN::get_best_combination");
2799
2800 // At this point "tables" and "primary"tables" represent the same:
2801 assert(tables == primary_tables);
2802
2803 /*
2804 Allocate additional space for tmp tables.
2805 Number of plan nodes:
2806 # of regular input tables (including semi-joined ones) +
2807 # of semi-join nests for materialization +
2808 1? + // For GROUP BY
2809 1? + // For DISTINCT
2810 1? + // For aggregation functions aggregated in outer query
2811 // when used with distinct
2812 1? + // For ORDER BY
2813 1? // buffer result
2814 Up to 2 tmp tables are actually used, but it's hard to tell exact number
2815 at this stage.
2816 */
2817 uint num_tmp_tables= (group_list ? 1 : 0) +
2818 (select_distinct ?
2819 (tmp_table_param.outer_sum_func_count ? 2 : 1) : 0) +
2820 (order ? 1 : 0) +
2821 (select_lex->active_options() &
2822 (SELECT_BIG_RESULT | OPTION_BUFFER_RESULT) ? 1 : 0);
2823 if (num_tmp_tables > 2)
2824 num_tmp_tables= 2;
2825
2826 /*
2827 Rearrange queries with materialized semi-join nests so that the semi-join
2828 nest is replaced with a reference to a materialized temporary table and all
2829 materialized subquery tables are placed after the intermediate tables.
2830 After the following loop, "inner_target" is the position of the first
2831 subquery table (if any). "outer_target" is the position of first outer
2832 table, and will later be used to track the position of any materialized
2833 temporary tables.
2834 */
2835 const bool has_semijoin= !select_lex->sj_nests.is_empty();
2836 uint outer_target= 0;
2837 uint inner_target= primary_tables + num_tmp_tables;
2838 uint sjm_nests= 0;
2839
2840 if (has_semijoin)
2841 {
2842 for (uint tableno= 0; tableno < primary_tables; )
2843 {
2844 if (sj_is_materialize_strategy(best_positions[tableno].sj_strategy))
2845 {
2846 sjm_nests++;
2847 inner_target-= (best_positions[tableno].n_sj_tables - 1);
2848 tableno+= best_positions[tableno].n_sj_tables;
2849 }
2850 else
2851 tableno++;
2852 }
2853 }
2854
2855 JOIN_TAB *tmp_join_tabs= NULL;
2856 if (sjm_nests + num_tmp_tables)
2857 {
2858 // join_tab array only has "primary_tables" tables. We need those more:
2859 if (!(tmp_join_tabs= alloc_jtab_array(thd, sjm_nests + num_tmp_tables)))
2860 DBUG_RETURN(true); /* purecov: inspected */
2861 }
2862
2863 // To check that we fill the array correctly: fill it with zeros first
2864 memset(best_ref, 0, sizeof(JOIN_TAB*) * (primary_tables + sjm_nests +
2865 num_tmp_tables));
2866
2867 int sjm_index= tables; // Number assigned to materialized temporary table
2868 int remaining_sjm_inner= 0;
2869 bool err= false;
2870 for (uint tableno= 0; tableno < tables; tableno++)
2871 {
2872 POSITION *const pos= best_positions + tableno;
2873 if (has_semijoin && sj_is_materialize_strategy(pos->sj_strategy))
2874 {
2875 assert(outer_target < inner_target);
2876
2877 TABLE_LIST *const sj_nest= pos->table->emb_sj_nest;
2878
2879 // Handle this many inner tables of materialized semi-join
2880 remaining_sjm_inner= pos->n_sj_tables;
2881
2882 /*
2883 If we fail in some allocation below, we cannot bail out immediately;
2884 that would put us in a difficult situation to clean up; imagine we
2885 have planned this layout:
2886 outer1 - sj_mat_tmp1 - outer2 - sj_mat_tmp2 - outer3
2887 We have successfully filled a JOIN_TAB for sj_mat_tmp1, and are
2888 failing to fill a JOIN_TAB for sj_mat_tmp2 (OOM). So we want to quit
2889 this function, which will lead to cleanup functions.
2890 But sj_mat_tmp1 is in this->best_ref only, outer3 is in this->join_tab
2891 only: what is the array to traverse for cleaning up? What is the
2892 number of tables to loop over?
2893 So: if we fail in the present loop, we record the error but continue
2894 filling best_ref; when it's fully filled, bail out, because then
2895 best_ref can be used as reliable array for cleaning up.
2896 */
2897 JOIN_TAB *const tab= tmp_join_tabs++;
2898 best_ref[outer_target]= tab;
2899 tab->set_join(this);
2900 tab->set_idx(outer_target);
2901
2902 /*
2903 Up to this point there cannot be a failure. JOIN_TAB has been filled
2904 enough to be clean-able.
2905 */
2906
2907 Semijoin_mat_exec *const sjm_exec=
2908 new (thd->mem_root)
2909 Semijoin_mat_exec(sj_nest,
2910 (pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN),
2911 remaining_sjm_inner, outer_target, inner_target);
2912
2913 tab->set_sj_mat_exec(sjm_exec);
2914
2915 if (!sjm_exec ||
2916 setup_semijoin_materialized_table(tab, sjm_index,
2917 pos, best_positions + sjm_index))
2918 err= true; /* purecov: inspected */
2919
2920 outer_target++;
2921 sjm_index++;
2922 }
2923 /*
2924 Locate join_tab target for the table we are considering.
2925 (remaining_sjm_inner becomes negative for non-SJM tables, this can be
2926 safely ignored).
2927 */
2928 const uint target=
2929 (remaining_sjm_inner--) > 0 ? inner_target++ : outer_target++;
2930 JOIN_TAB *const tab= pos->table;
2931
2932 best_ref[target]= tab;
2933 tab->set_idx(target);
2934 tab->set_position(pos);
2935 TABLE *const table= tab->table();
2936 if (tab->type() != JT_CONST && tab->type() != JT_SYSTEM)
2937 {
2938 if (pos->sj_strategy == SJ_OPT_LOOSE_SCAN && tab->quick() &&
2939 tab->quick()->index != pos->loosescan_key)
2940 {
2941 /*
2942 We must use the duplicate-eliminating index, so this QUICK is not
2943 an option.
2944 */
2945 delete tab->quick();
2946 tab->set_quick(NULL);
2947 }
2948 if (!pos->key)
2949 {
2950 if (tab->quick())
2951 tab->set_type(calc_join_type(tab->quick()->get_type()));
2952 else
2953 tab->set_type(JT_ALL);
2954 }
2955 else
2956 // REF or RANGE, clarify later when prefix tables are set for JOIN_TABs
2957 tab->set_type(JT_REF);
2958 }
2959 assert(tab->type() != JT_UNKNOWN);
2960
2961 assert(table->reginfo.join_tab == tab);
2962 if (!tab->join_cond())
2963 table->reginfo.not_exists_optimize= false; // Only with LEFT JOIN
2964 map2table[tab->table_ref->tableno()]= tab;
2965 }
2966
2967 // Count the materialized semi-join tables as regular input tables
2968 tables+= sjm_nests + num_tmp_tables;
2969 // Set the number of non-materialized tables:
2970 primary_tables= outer_target;
2971
2972 /*
2973 Between the last outer table or sj-mat tmp table, and the first sj-mat
2974 inner table, there may be 2 slots for sort/group/etc tmp tables:
2975 */
2976 for (uint i= 0; i < num_tmp_tables; ++i)
2977 {
2978 const uint idx= outer_target + i;
2979 tmp_join_tabs->set_join(this);
2980 tmp_join_tabs->set_idx(idx);
2981 assert(best_ref[idx] == NULL); // verify that not overwriting
2982 best_ref[idx]= tmp_join_tabs++;
2983 /*
2984 note that set_table() cannot be called yet. We may not even use this
2985 JOIN_TAB in the end, it's dummy at the moment. Which can be tested with
2986 "position()!=NULL".
2987 */
2988 }
2989
2990 // make array unreachable: should walk JOIN_TABs by best_ref now
2991 join_tab= NULL;
2992
2993 if (err)
2994 DBUG_RETURN(true); /* purecov: inspected */
2995
2996 if (has_semijoin)
2997 {
2998 set_semijoin_info();
2999
3000 // Update equalities and keyuses after having added SJ materialization
3001 if (update_equalities_for_sjm())
3002 DBUG_RETURN(true);
3003 }
3004 if (!plan_is_const())
3005 {
3006 // Assign map of "available" tables to all tables belonging to query block
3007 set_prefix_tables();
3008 adjust_access_methods();
3009 }
3010 // Calculate outer join info
3011 if (select_lex->outer_join)
3012 make_outerjoin_info();
3013
3014 // sjm is no longer needed, trash it. To reuse it, reset its members!
3015 List_iterator<TABLE_LIST> sj_list_it(select_lex->sj_nests);
3016 TABLE_LIST *sj_nest;
3017 while ((sj_nest= sj_list_it++))
3018 TRASH(static_cast<void*>(&sj_nest->nested_join->sjm),
3019 sizeof(sj_nest->nested_join->sjm));
3020
3021 DBUG_RETURN(false);
3022 }
3023
3024
3025 /*
3026 Revise usage of join buffer for the specified table and the whole nest
3027
3028 SYNOPSIS
3029 revise_cache_usage()
3030 tab join table for which join buffer usage is to be revised
3031
3032 DESCRIPTION
3033 The function revise the decision to use a join buffer for the table 'tab'.
3034 If this table happened to be among the inner tables of a nested outer join/
3035 semi-join the functions denies usage of join buffers for all of them
3036
3037 RETURN
3038 none
3039 */
3040
3041 static
revise_cache_usage(JOIN_TAB * join_tab)3042 void revise_cache_usage(JOIN_TAB *join_tab)
3043 {
3044 plan_idx first_inner= join_tab->first_inner();
3045 JOIN *const join= join_tab->join();
3046 if (first_inner != NO_PLAN_IDX)
3047 {
3048 plan_idx end_tab= join_tab->idx();
3049 for (first_inner= join_tab->first_inner();
3050 first_inner != NO_PLAN_IDX;
3051 first_inner= join->best_ref[first_inner]->first_upper())
3052 {
3053 for (plan_idx i= end_tab-1; i >= first_inner; --i)
3054 join->best_ref[i]->set_use_join_cache(JOIN_CACHE::ALG_NONE);
3055 end_tab= first_inner;
3056 }
3057 }
3058 else if (join_tab->get_sj_strategy() == SJ_OPT_FIRST_MATCH)
3059 {
3060 plan_idx first_sj_inner= join_tab->first_sj_inner();
3061 for (plan_idx i= join_tab->idx()-1; i >= first_sj_inner; --i)
3062 {
3063 JOIN_TAB *tab= join->best_ref[i];
3064 if (tab->first_sj_inner() == first_sj_inner)
3065 tab->set_use_join_cache(JOIN_CACHE::ALG_NONE);
3066 }
3067 }
3068 else
3069 join_tab->set_use_join_cache(JOIN_CACHE::ALG_NONE);
3070 assert(join->qep_tab == NULL);
3071 }
3072
3073
3074 /**
3075 Set up join buffering for a specified table, if possible.
3076
3077 @param tab joined table to check join buffer usage for
3078 @param join join for which the check is performed
3079 @param no_jbuf_after don't use join buffering after table with this number
3080
3081 @return false if successful, true if error.
3082 Currently, allocation errors for join cache objects are ignored,
3083 and regular execution is chosen silently.
3084
3085 @details
3086 The function finds out whether the table 'tab' can be joined using a join
3087 buffer. This check is performed after the best execution plan for 'join'
3088 has been chosen. If the function decides that a join buffer can be employed
3089 then it selects the most appropriate join cache type, which later will
3090 be instantiated by init_join_cache().
3091 If it has already been decided to not use join buffering for this table,
3092 no action is taken.
3093
3094 Often it is already decided that join buffering will be used earlier in
3095 the optimization process, and this will also ensure that the most correct
3096 cost for the operation is calculated, and hence the probability of
3097 choosing an optimal join plan is higher. However, some join buffering
3098 decisions cannot currently be taken before this stage, hence we need this
3099 function to decide the most accurate join buffering strategy.
3100
3101 @todo Long-term it is the goal that join buffering strategy is decided
3102 when the plan is selected.
3103
3104 The result of the check and the type of the join buffer to be used
3105 depend on:
3106 - the access method to access rows of the joined table
3107 - whether the join table is an inner table of an outer join or semi-join
3108 - the optimizer_switch settings for join buffering
3109 - the join 'options'.
3110 In any case join buffer is not used if the number of the joined table is
3111 greater than 'no_jbuf_after'.
3112
3113 If block_nested_loop is turned on, and if all other criteria for using
3114 join buffering is fulfilled (see below), then join buffer is used
3115 for any join operation (inner join, outer join, semi-join) with 'JT_ALL'
3116 access method. In that case, a JOIN_CACHE_BNL type is always employed.
3117
3118 If an index is used to access rows of the joined table and batched_key_access
3119 is on, then a JOIN_CACHE_BKA type is employed. (Unless debug flag,
3120 test_bka unique, is set, then a JOIN_CACHE_BKA_UNIQUE type is employed
3121 instead.)
3122
3123 If the function decides that a join buffer can be used to join the table
3124 'tab' then it sets @c tab->use_join_cache to reflect the chosen algorithm.
3125
3126 @note
3127 For a nested outer join/semi-join, currently, we either use join buffers for
3128 all inner tables or for none of them.
3129
3130 @todo
3131 Support BKA inside SJ-Materialization nests. When doing this, we'll need
3132 to only store sj-inner tables in the join buffer.
3133 #if 0
3134 JOIN_TAB *first_tab= join->join_tab+join->const_tables;
3135 uint n_tables= i-join->const_tables;
3136 / *
3137 We normally put all preceding tables into the join buffer, except
3138 for the constant tables.
3139 If we're inside a semi-join materialization nest, e.g.
3140
3141 outer_tbl1 outer_tbl2 ( inner_tbl1, inner_tbl2 ) ...
3142 ^-- we're here
3143
3144 then we need to put into the join buffer only the tables from
3145 within the nest.
3146 * /
3147 if (i >= first_sjm_table && i < last_sjm_table)
3148 {
3149 n_tables= i - first_sjm_table; // will be >0 if we got here
3150 first_tab= join->join_tab + first_sjm_table;
3151 }
3152 #endif
3153
3154 */
3155
setup_join_buffering(JOIN_TAB * tab,JOIN * join,uint no_jbuf_after)3156 static bool setup_join_buffering(JOIN_TAB *tab, JOIN *join, uint no_jbuf_after)
3157 {
3158 ASSERT_BEST_REF_IN_JOIN_ORDER(join);
3159 Cost_estimate cost;
3160 ha_rows rows;
3161 uint bufsz= 4096;
3162 uint join_cache_flags = 0;
3163 const bool bnl_on= hint_table_state(join->thd, tab->table_ref->table,
3164 BNL_HINT_ENUM, OPTIMIZER_SWITCH_BNL);
3165 const bool bka_on= hint_table_state(join->thd, tab->table_ref->table,
3166 BKA_HINT_ENUM, OPTIMIZER_SWITCH_BKA);
3167
3168 const uint tableno= tab->idx();
3169 const uint tab_sj_strategy= tab->get_sj_strategy();
3170 bool use_bka_unique= false;
3171 DBUG_EXECUTE_IF("test_bka_unique", use_bka_unique= true;);
3172
3173 /*
3174 If all key_parts are null_rejecting, the MultiRangeRowIterator will
3175 eliminate all NULL values in the key set, such that
3176 HA_MRR_NO_NULL_ENDPOINTS can be promised.
3177 */
3178 const key_part_map keypart_map = make_prev_keypart_map(tab->ref().key_parts);
3179 if (tab->ref().null_rejecting == keypart_map) {
3180 join_cache_flags |= HA_MRR_NO_NULL_ENDPOINTS;
3181 }
3182
3183 // Set preliminary join cache setting based on decision from greedy search
3184 tab->set_use_join_cache(tab->position()->use_join_buffer ?
3185 JOIN_CACHE::ALG_BNL : JOIN_CACHE::ALG_NONE);
3186
3187 if (tableno == join->const_tables)
3188 {
3189 assert(tab->use_join_cache() == JOIN_CACHE::ALG_NONE);
3190 return false;
3191 }
3192
3193 if (!(bnl_on || bka_on))
3194 goto no_join_cache;
3195
3196 /*
3197 psergey-todo: why the below when execution code seems to handle the
3198 "range checked for each record" case?
3199 */
3200 if (tab->use_quick == QS_DYNAMIC_RANGE)
3201 goto no_join_cache;
3202
3203 /* No join buffering if prevented by no_jbuf_after */
3204 if (tableno > no_jbuf_after)
3205 goto no_join_cache;
3206
3207 /*
3208 An inner table of an outer join nest must not use join buffering if
3209 the first inner table of that outer join nest does not use join buffering.
3210 This condition is not handled by earlier optimizer stages.
3211 */
3212 if (tab->first_inner() != NO_PLAN_IDX &&
3213 tab->first_inner() != tab->idx() &&
3214 !join->best_ref[tab->first_inner()]->use_join_cache())
3215 goto no_join_cache;
3216 /*
3217 The first inner table of an outer join nest must not use join buffering
3218 if the tables in the embedding outer join nest do not use join buffering.
3219 This condition is not handled by earlier optimizer stages.
3220 */
3221 if (tab->first_upper() != NO_PLAN_IDX &&
3222 !join->best_ref[tab->first_upper()]->use_join_cache())
3223 goto no_join_cache;
3224
3225 switch (tab_sj_strategy)
3226 {
3227 case SJ_OPT_FIRST_MATCH:
3228 /*
3229 Use join cache with FirstMatch semi-join strategy only when semi-join
3230 contains only one table.
3231 */
3232 if (!tab->is_single_inner_of_semi_join())
3233 {
3234 assert(tab->use_join_cache() == JOIN_CACHE::ALG_NONE);
3235 goto no_join_cache;
3236 }
3237 break;
3238
3239 case SJ_OPT_LOOSE_SCAN:
3240 /* No join buffering if this semijoin nest is handled by loosescan */
3241 assert(tab->use_join_cache() == JOIN_CACHE::ALG_NONE);
3242 goto no_join_cache;
3243
3244 case SJ_OPT_MATERIALIZE_LOOKUP:
3245 case SJ_OPT_MATERIALIZE_SCAN:
3246 /*
3247 The Materialize strategies reuse the join_tab belonging to the
3248 first table that was materialized. Neither table can use join buffering:
3249 - The first table in a join never uses join buffering.
3250 - The join_tab used for looking up a row in the materialized table, or
3251 scanning the rows of a materialized table, cannot use join buffering.
3252 We allow join buffering for the remaining tables of the materialized
3253 semi-join nest.
3254 */
3255 if (tab->first_sj_inner() == tab->idx())
3256 {
3257 assert(tab->use_join_cache() == JOIN_CACHE::ALG_NONE);
3258 goto no_join_cache;
3259 }
3260 break;
3261
3262 case SJ_OPT_DUPS_WEEDOUT:
3263 // This strategy allows the same join buffering as a regular join would.
3264 case SJ_OPT_NONE:
3265 break;
3266 }
3267
3268 /*
3269 The following code prevents use of join buffering when there is an
3270 outer join operation and first match semi-join strategy is used, because:
3271
3272 Outer join needs a "match flag" to track that a row should be
3273 NULL-complemented, such flag being attached to first inner table's cache
3274 (tracks whether the cached row from outer table got a match, in which case
3275 no NULL-complemented row is needed).
3276
3277 FirstMatch also needs a "match flag", such flag is attached to sj inner
3278 table's cache (tracks whether the cached row from outer table already got
3279 a first match in the sj-inner table, in which case we don't need to join
3280 this cached row again)
3281 - but a row in a cache has only one "match flag"
3282 - so if "sj inner table"=="first inner", there is a problem.
3283 */
3284 if (tab_sj_strategy == SJ_OPT_FIRST_MATCH &&
3285 tab->is_inner_table_of_outer_join())
3286 goto no_join_cache;
3287
3288 switch (tab->type()) {
3289 case JT_ALL:
3290 case JT_INDEX_SCAN:
3291 case JT_RANGE:
3292 case JT_INDEX_MERGE:
3293 if (!bnl_on)
3294 {
3295 assert(tab->use_join_cache() == JOIN_CACHE::ALG_NONE);
3296 goto no_join_cache;
3297 }
3298
3299 tab->set_use_join_cache(JOIN_CACHE::ALG_BNL);
3300 return false;
3301 case JT_SYSTEM:
3302 case JT_CONST:
3303 case JT_REF:
3304 case JT_EQ_REF:
3305 if (!bka_on)
3306 {
3307 assert(tab->use_join_cache() == JOIN_CACHE::ALG_NONE);
3308 goto no_join_cache;
3309 }
3310
3311 /*
3312 Disable BKA for materializable derived tables/views as they aren't
3313 instantiated yet.
3314 */
3315 if (tab->table_ref->uses_materialization())
3316 goto no_join_cache;
3317
3318 /*
3319 Can't use BKA for subquery if dealing with a subquery that can
3320 turn a ref access into a "full scan on NULL key" table scan.
3321
3322 @see Item_in_optimizer::val_int()
3323 @see subselect_single_select_engine::exec()
3324 @see TABLE_REF::cond_guards
3325 @see push_index_cond()
3326
3327 @todo: This choice to not use BKA should be done before making
3328 cost estimates, e.g. in set_join_buffer_properties(). That
3329 happens before cond guards are set up, so instead of doing the
3330 check below, BKA should be disabled if
3331 - We are in an IN subquery, and
3332 - The IN predicate is not a top_level_item, and
3333 - The left_expr of the IN predicate may contain NULL values
3334 (left_expr->maybe_null)
3335 */
3336 if (tab->has_guarded_conds())
3337 goto no_join_cache;
3338
3339 if (tab->table()->covering_keys.is_set(tab->ref().key))
3340 join_cache_flags|= HA_MRR_INDEX_ONLY;
3341 rows= tab->table()->file->multi_range_read_info(tab->ref().key, 10, 20,
3342 &bufsz,
3343 &join_cache_flags, &cost);
3344 /*
3345 Cannot use BKA/BKA_UNIQUE if
3346 1. MRR scan cannot be performed, or
3347 2. MRR default implementation is used
3348 Cannot use BKA if
3349 3. HA_MRR_NO_ASSOCIATION flag is set
3350 */
3351 if ((rows == HA_POS_ERROR) || // 1
3352 (join_cache_flags & HA_MRR_USE_DEFAULT_IMPL) || // 2
3353 ((join_cache_flags & HA_MRR_NO_ASSOCIATION) && // 3
3354 !use_bka_unique))
3355 goto no_join_cache;
3356
3357 if (use_bka_unique)
3358 tab->set_use_join_cache(JOIN_CACHE::ALG_BKA_UNIQUE);
3359 else
3360 tab->set_use_join_cache(JOIN_CACHE::ALG_BKA);
3361
3362 tab->join_cache_flags= join_cache_flags;
3363 return false;
3364 default : ;
3365 }
3366
3367 no_join_cache:
3368 revise_cache_usage(tab);
3369 tab->set_use_join_cache(JOIN_CACHE::ALG_NONE);
3370 return false;
3371 }
3372
3373
3374 /*****************************************************************************
3375 Make some simple condition optimization:
3376 If there is a test 'field = const' change all refs to 'field' to 'const'
3377 Remove all dummy tests 'item = item', 'const op const'.
3378 Remove all 'item is NULL', when item can never be null!
3379 item->marker should be 0 for all items on entry
3380 Return in cond_value FALSE if condition is impossible (1 = 2)
3381 *****************************************************************************/
3382
3383 class COND_CMP :public ilink<COND_CMP> {
3384 public:
operator new(size_t size)3385 static void *operator new(size_t size)
3386 {
3387 return sql_alloc(size);
3388 }
operator delete(void * ptr MY_ATTRIBUTE ((unused)),size_t size MY_ATTRIBUTE ((unused)))3389 static void operator delete(void *ptr MY_ATTRIBUTE((unused)),
3390 size_t size MY_ATTRIBUTE((unused)))
3391 { TRASH(ptr, size); }
3392
3393 Item *and_level;
3394 Item_func *cmp_func;
COND_CMP(Item * a,Item_func * b)3395 COND_CMP(Item *a,Item_func *b) :and_level(a),cmp_func(b) {}
3396 };
3397
3398
3399 /**
3400 Find the multiple equality predicate containing a field.
3401
3402 The function retrieves the multiple equalities accessed through
3403 the cond_equal structure from current level and up looking for
3404 an equality containing a field. It stops retrieval as soon as the equality
3405 is found and set up inherited_fl to TRUE if it's found on upper levels.
3406
3407 @param cond_equal multiple equalities to search in
3408 @param item_field field to look for
3409 @param[out] inherited_fl set up to TRUE if multiple equality is found
3410 on upper levels (not on current level of
3411 cond_equal)
3412
3413 @return
3414 - Item_equal for the found multiple equality predicate if a success;
3415 - NULL otherwise.
3416 */
3417
find_item_equal(COND_EQUAL * cond_equal,Item_field * item_field,bool * inherited_fl)3418 Item_equal *find_item_equal(COND_EQUAL *cond_equal, Item_field *item_field,
3419 bool *inherited_fl)
3420 {
3421 Item_equal *item= 0;
3422 bool in_upper_level= FALSE;
3423 while (cond_equal)
3424 {
3425 List_iterator_fast<Item_equal> li(cond_equal->current_level);
3426 while ((item= li++))
3427 {
3428 if (item->contains(item_field->field))
3429 goto finish;
3430 }
3431 in_upper_level= TRUE;
3432 cond_equal= cond_equal->upper_levels;
3433 }
3434 in_upper_level= FALSE;
3435 finish:
3436 *inherited_fl= in_upper_level;
3437 return item;
3438 }
3439
3440
3441 /**
3442 Get the best field substitution for a given field.
3443
3444 If the field is member of a multiple equality, look up that equality
3445 and return the most appropriate field. Usually this is the equivalenced
3446 field belonging to the outer-most table in the join order, but
3447 @see Item_field::get_subst_item() for details.
3448 Otherwise, return the same field.
3449
3450 @param item_field The field that we are seeking a substitution for.
3451 @param cond_equal multiple equalities to search in
3452
3453 @return The substituted field.
3454 */
3455
get_best_field(Item_field * item_field,COND_EQUAL * cond_equal)3456 Item_field *get_best_field(Item_field *item_field, COND_EQUAL *cond_equal)
3457 {
3458 bool dummy;
3459 Item_equal *item_eq= find_item_equal(cond_equal, item_field, &dummy);
3460 if (!item_eq)
3461 return item_field;
3462
3463 return item_eq->get_subst_item(item_field);
3464 }
3465
3466
3467 /**
3468 Check whether an equality can be used to build multiple equalities.
3469
3470 This function first checks whether the equality (left_item=right_item)
3471 is a simple equality i.e. one that equates a field with another field
3472 or a constant (field=field_item or field=const_item).
3473 If this is the case the function looks for a multiple equality
3474 in the lists referenced directly or indirectly by cond_equal inferring
3475 the given simple equality. If it doesn't find any, it builds a multiple
3476 equality that covers the predicate, i.e. the predicate can be inferred
3477 from this multiple equality.
3478 The built multiple equality could be obtained in such a way:
3479 create a binary multiple equality equivalent to the predicate, then
3480 merge it, if possible, with one of old multiple equalities.
3481 This guarantees that the set of multiple equalities covering equality
3482 predicates will be minimal.
3483
3484 EXAMPLE:
3485 For the where condition
3486 @code
3487 WHERE a=b AND b=c AND
3488 (b=2 OR f=e)
3489 @endcode
3490 the check_equality will be called for the following equality
3491 predicates a=b, b=c, b=2 and f=e.
3492 - For a=b it will be called with *cond_equal=(0,[]) and will transform
3493 *cond_equal into (0,[Item_equal(a,b)]).
3494 - For b=c it will be called with *cond_equal=(0,[Item_equal(a,b)])
3495 and will transform *cond_equal into CE=(0,[Item_equal(a,b,c)]).
3496 - For b=2 it will be called with *cond_equal=(ptr(CE),[])
3497 and will transform *cond_equal into (ptr(CE),[Item_equal(2,a,b,c)]).
3498 - For f=e it will be called with *cond_equal=(ptr(CE), [])
3499 and will transform *cond_equal into (ptr(CE),[Item_equal(f,e)]).
3500
3501 @note
3502 Now only fields that have the same type definitions (verified by
3503 the Field::eq_def method) are placed to the same multiple equalities.
3504 Because of this some equality predicates are not eliminated and
3505 can be used in the constant propagation procedure.
3506 We could weaken the equality test as soon as at least one of the
3507 equal fields is to be equal to a constant. It would require a
3508 more complicated implementation: we would have to store, in
3509 general case, its own constant for each fields from the multiple
3510 equality. But at the same time it would allow us to get rid
3511 of constant propagation completely: it would be done by the call
3512 to build_equal_items_for_cond.
3513
3514 The implementation does not follow exactly the above rules to
3515 build a new multiple equality for the equality predicate.
3516 If it processes the equality of the form field1=field2, it
3517 looks for multiple equalities me1 containing field1 and me2 containing
3518 field2. If only one of them is found the function expands it with
3519 the lacking field. If multiple equalities for both fields are
3520 found they are merged. If both searches fail a new multiple equality
3521 containing just field1 and field2 is added to the existing
3522 multiple equalities.
3523 If the function processes the predicate of the form field1=const,
3524 it looks for a multiple equality containing field1. If found, the
3525 function checks the constant of the multiple equality. If the value
3526 is unknown, it is setup to const. Otherwise the value is compared with
3527 const and the evaluation of the equality predicate is performed.
3528 When expanding/merging equality predicates from the upper levels
3529 the function first copies them for the current level. It looks
3530 acceptable, as this happens rarely. The implementation without
3531 copying would be much more complicated.
3532
3533 @param thd Thread handler
3534 @param left_item left term of the equality to be checked
3535 @param right_item right term of the equality to be checked
3536 @param item equality item if the equality originates from a condition
3537 predicate, 0 if the equality is the result of row
3538 elimination
3539 @param cond_equal multiple equalities that must hold together with the
3540 equality
3541 @param[out] simple_equality
3542 true if the predicate is a simple equality predicate
3543 to be used for building multiple equalities
3544 false otherwise
3545
3546 @returns false if success, true if error
3547 */
3548
check_simple_equality(THD * thd,Item * left_item,Item * right_item,Item * item,COND_EQUAL * cond_equal,bool * simple_equality)3549 static bool check_simple_equality(THD *thd,
3550 Item *left_item, Item *right_item,
3551 Item *item, COND_EQUAL *cond_equal,
3552 bool *simple_equality)
3553 {
3554 *simple_equality= false;
3555
3556 if (left_item->type() == Item::REF_ITEM &&
3557 down_cast<Item_ref *>(left_item)->ref_type() == Item_ref::VIEW_REF)
3558 {
3559 if (down_cast<Item_ref *>(left_item)->depended_from)
3560 return false;
3561 left_item= left_item->real_item();
3562 }
3563 if (right_item->type() == Item::REF_ITEM &&
3564 down_cast<Item_ref *>(right_item)->ref_type() == Item_ref::VIEW_REF)
3565 {
3566 if (down_cast<Item_ref *>(right_item)->depended_from)
3567 return false;
3568 right_item= right_item->real_item();
3569 }
3570 Item_field *left_item_field, *right_item_field;
3571
3572 if (left_item->type() == Item::FIELD_ITEM &&
3573 right_item->type() == Item::FIELD_ITEM &&
3574 (left_item_field= down_cast<Item_field *>(left_item)) &&
3575 (right_item_field= down_cast<Item_field *>(right_item)) &&
3576 !left_item_field->depended_from &&
3577 !right_item_field->depended_from)
3578 {
3579 /* The predicate the form field1=field2 is processed */
3580
3581 Field *const left_field= left_item_field->field;
3582 Field *const right_field= right_item_field->field;
3583
3584 if (!left_field->eq_def(right_field))
3585 return false;
3586
3587 /* Search for multiple equalities containing field1 and/or field2 */
3588 bool left_copyfl, right_copyfl;
3589 Item_equal *left_item_equal=
3590 find_item_equal(cond_equal, left_item_field, &left_copyfl);
3591 Item_equal *right_item_equal=
3592 find_item_equal(cond_equal, right_item_field, &right_copyfl);
3593
3594 /* As (NULL=NULL) != TRUE we can't just remove the predicate f=f */
3595 if (left_field->eq(right_field)) /* f = f */
3596 {
3597 *simple_equality= !(left_field->maybe_null() && !left_item_equal);
3598 return false;
3599 }
3600
3601 if (left_item_equal && left_item_equal == right_item_equal)
3602 {
3603 /*
3604 The equality predicate is inference of one of the existing
3605 multiple equalities, i.e the condition is already covered
3606 by upper level equalities
3607 */
3608 *simple_equality= true;
3609 return false;
3610 }
3611
3612 /* Copy the found multiple equalities at the current level if needed */
3613 if (left_copyfl)
3614 {
3615 /* left_item_equal of an upper level contains left_item */
3616 left_item_equal= new Item_equal(left_item_equal);
3617 if (left_item_equal == NULL)
3618 return true;
3619 cond_equal->current_level.push_back(left_item_equal);
3620 }
3621 if (right_copyfl)
3622 {
3623 /* right_item_equal of an upper level contains right_item */
3624 right_item_equal= new Item_equal(right_item_equal);
3625 if (right_item_equal == NULL)
3626 return true;
3627 cond_equal->current_level.push_back(right_item_equal);
3628 }
3629
3630 if (left_item_equal)
3631 {
3632 /* left item was found in the current or one of the upper levels */
3633 if (! right_item_equal)
3634 left_item_equal->add(down_cast<Item_field *>(right_item));
3635 else
3636 {
3637 /* Merge two multiple equalities forming a new one */
3638 if (left_item_equal->merge(thd, right_item_equal))
3639 return true;
3640 /* Remove the merged multiple equality from the list */
3641 List_iterator<Item_equal> li(cond_equal->current_level);
3642 while ((li++) != right_item_equal) ;
3643 li.remove();
3644 }
3645 }
3646 else
3647 {
3648 /* left item was not found neither the current nor in upper levels */
3649 if (right_item_equal)
3650 {
3651 right_item_equal->add(down_cast<Item_field *>(left_item));
3652 }
3653 else
3654 {
3655 /* None of the fields was found in multiple equalities */
3656 Item_equal *item_equal=
3657 new Item_equal(down_cast<Item_field *>(left_item),
3658 down_cast<Item_field *>(right_item));
3659 if (item_equal == NULL)
3660 return true;
3661 cond_equal->current_level.push_back(item_equal);
3662 }
3663 }
3664 *simple_equality= true;
3665 return false;
3666 }
3667
3668 {
3669 /* The predicate of the form field=const/const=field is processed */
3670 Item *const_item= 0;
3671 Item_field *field_item= 0;
3672 if (left_item->type() == Item::FIELD_ITEM &&
3673 (field_item= down_cast<Item_field *>(left_item)) &&
3674 field_item->depended_from == NULL &&
3675 right_item->const_item())
3676 {
3677 const_item= right_item;
3678 }
3679 else if (right_item->type() == Item::FIELD_ITEM &&
3680 (field_item= down_cast<Item_field *>(right_item)) &&
3681 field_item->depended_from == NULL &&
3682 left_item->const_item())
3683 {
3684 const_item= left_item;
3685 }
3686
3687 if (const_item &&
3688 field_item->result_type() == const_item->result_type())
3689 {
3690 if (field_item->result_type() == STRING_RESULT)
3691 {
3692 const CHARSET_INFO *cs= field_item->field->charset();
3693 if (!item)
3694 {
3695 Item_func_eq *const eq_item= new Item_func_eq(left_item, right_item);
3696 if (eq_item == NULL || eq_item->set_cmp_func())
3697 return true;
3698 eq_item->quick_fix_field();
3699 item= eq_item;
3700 }
3701 if ((cs != down_cast<Item_func *>(item)->compare_collation()) ||
3702 !cs->coll->propagate(cs, 0, 0))
3703 return false;
3704 }
3705
3706 bool copyfl;
3707 Item_equal *item_equal= find_item_equal(cond_equal, field_item, ©fl);
3708 if (copyfl)
3709 {
3710 item_equal= new Item_equal(item_equal);
3711 if (item_equal == NULL)
3712 return true;
3713 cond_equal->current_level.push_back(item_equal);
3714 }
3715 if (item_equal)
3716 {
3717 /*
3718 The flag cond_false will be set to 1 after this, if item_equal
3719 already contains a constant and its value is not equal to
3720 the value of const_item.
3721 */
3722 if (item_equal->add(thd, const_item, field_item))
3723 return true;
3724 }
3725 else
3726 {
3727 item_equal= new Item_equal(const_item, field_item);
3728 if (item_equal == NULL)
3729 return true;
3730 cond_equal->current_level.push_back(item_equal);
3731 }
3732 *simple_equality= true;
3733 return false;
3734 }
3735 }
3736 return false;
3737 }
3738
3739
3740 /**
3741 Convert row equalities into a conjunction of regular equalities.
3742
3743 The function converts a row equality of the form (E1,...,En)=(E'1,...,E'n)
3744 into a list of equalities E1=E'1,...,En=E'n. For each of these equalities
3745 Ei=E'i the function checks whether it is a simple equality or a row
3746 equality. If it is a simple equality it is used to expand multiple
3747 equalities of cond_equal. If it is a row equality it converted to a
3748 sequence of equalities between row elements. If Ei=E'i is neither a
3749 simple equality nor a row equality the item for this predicate is added
3750 to eq_list.
3751
3752 @param thd thread handle
3753 @param left_row left term of the row equality to be processed
3754 @param right_row right term of the row equality to be processed
3755 @param cond_equal multiple equalities that must hold together with the
3756 predicate
3757 @param eq_list results of conversions of row equalities that are not
3758 simple enough to form multiple equalities
3759 @param[out] simple_equality
3760 true if the row equality is composed of only
3761 simple equalities.
3762
3763 @returns false if conversion succeeded, true if any error.
3764 */
3765
check_row_equality(THD * thd,Item * left_row,Item_row * right_row,COND_EQUAL * cond_equal,List<Item> * eq_list,bool * simple_equality)3766 static bool check_row_equality(THD *thd, Item *left_row, Item_row *right_row,
3767 COND_EQUAL *cond_equal, List<Item>* eq_list,
3768 bool *simple_equality)
3769 {
3770 *simple_equality= false;
3771 uint n= left_row->cols();
3772 for (uint i= 0 ; i < n; i++)
3773 {
3774 bool is_converted;
3775 Item *left_item= left_row->element_index(i);
3776 Item *right_item= right_row->element_index(i);
3777 if (left_item->type() == Item::ROW_ITEM &&
3778 right_item->type() == Item::ROW_ITEM)
3779 {
3780 if (check_row_equality(thd,
3781 down_cast<Item_row *>(left_item),
3782 down_cast<Item_row *>(right_item),
3783 cond_equal, eq_list, &is_converted))
3784 return true;
3785 if (!is_converted)
3786 thd->lex->current_select()->cond_count++;
3787 }
3788 else
3789 {
3790 if (check_simple_equality(thd, left_item, right_item, 0, cond_equal,
3791 &is_converted))
3792 return true;
3793 thd->lex->current_select()->cond_count++;
3794 }
3795
3796 if (!is_converted)
3797 {
3798 Item_func_eq *const eq_item= new Item_func_eq(left_item, right_item);
3799 if (eq_item == NULL)
3800 return true;
3801 if (eq_item->set_cmp_func())
3802 {
3803 // Failed to create cmp func -> not only simple equalitities
3804 return true;
3805 }
3806 eq_item->quick_fix_field();
3807 eq_list->push_back(eq_item);
3808 }
3809 }
3810 *simple_equality= true;
3811 return false;
3812 }
3813
3814
3815 /**
3816 Eliminate row equalities and form multiple equalities predicates.
3817
3818 This function checks whether the item is a simple equality
3819 i.e. the one that equates a field with another field or a constant
3820 (field=field_item or field=constant_item), or, a row equality.
3821 For a simple equality the function looks for a multiple equality
3822 in the lists referenced directly or indirectly by cond_equal inferring
3823 the given simple equality. If it doesn't find any, it builds/expands
3824 multiple equality that covers the predicate.
3825 Row equalities are eliminated substituted for conjunctive regular
3826 equalities which are treated in the same way as original equality
3827 predicates.
3828
3829 @param thd thread handle
3830 @param item predicate to process
3831 @param cond_equal multiple equalities that must hold together with the
3832 predicate
3833 @param eq_list results of conversions of row equalities that are not
3834 simple enough to form multiple equalities
3835 @param[out] equality
3836 true if re-writing rules have been applied
3837 false otherwise, i.e.
3838 if the predicate is not an equality, or
3839 if the equality is neither a simple nor a row equality
3840
3841 @returns false if success, true if error
3842
3843 @note If the equality was created by IN->EXISTS, it may be removed later by
3844 subquery materialization. So we don't mix this possibly temporary equality
3845 with others; if we let it go into a multiple-equality (Item_equal), then we
3846 could not remove it later. There is however an exception: if the outer
3847 expression is a constant, it is safe to leave the equality even in
3848 materialization; all it can do is preventing NULL/FALSE distinction but if
3849 such distinction mattered the equality would be in a triggered condition so
3850 we would not come to this function. And injecting constants is good because
3851 it makes the materialized table smaller.
3852 */
3853
check_equality(THD * thd,Item * item,COND_EQUAL * cond_equal,List<Item> * eq_list,bool * equality)3854 static bool check_equality(THD *thd, Item *item, COND_EQUAL *cond_equal,
3855 List<Item> *eq_list, bool *equality)
3856 {
3857 *equality= false;
3858 Item_func *item_func;
3859 if (item->type() == Item::FUNC_ITEM &&
3860 (item_func= down_cast<Item_func *>(item))->functype() ==
3861 Item_func::EQ_FUNC)
3862 {
3863 Item *left_item= item_func->arguments()[0];
3864 Item *right_item= item_func->arguments()[1];
3865
3866 if (item->created_by_in2exists() && !left_item->const_item())
3867 return false; // See note above
3868
3869 if (left_item->type() == Item::ROW_ITEM &&
3870 right_item->type() == Item::ROW_ITEM)
3871 {
3872 thd->lex->current_select()->cond_count--;
3873 return check_row_equality(thd,
3874 down_cast<Item_row *>(left_item),
3875 down_cast<Item_row *>(right_item),
3876 cond_equal, eq_list, equality);
3877 }
3878 else
3879 return check_simple_equality(thd, left_item, right_item, item, cond_equal,
3880 equality);
3881 }
3882
3883 return false;
3884 }
3885
3886
3887 /**
3888 Replace all equality predicates in a condition by multiple equality items.
3889
3890 At each 'and' level the function detects items for equality predicates
3891 and replaces them by a set of multiple equality items of class Item_equal,
3892 taking into account inherited equalities from upper levels.
3893 If an equality predicate is used not in a conjunction it's just
3894 replaced by a multiple equality predicate.
3895 For each 'and' level the function set a pointer to the inherited
3896 multiple equalities in the cond_equal field of the associated
3897 object of the type Item_cond_and.
3898 The function also traverses the cond tree and for each field reference
3899 sets a pointer to the multiple equality item containing the field, if there
3900 is any. If this multiple equality equates fields to a constant the
3901 function replaces the field reference by the constant in the cases
3902 when the field is not of a string type or when the field reference is
3903 just an argument of a comparison predicate.
3904 The function also determines the maximum number of members in
3905 equality lists of each Item_cond_and object assigning it to
3906 thd->lex->current_select()->max_equal_elems.
3907
3908 @note
3909 Multiple equality predicate =(f1,..fn) is equivalent to the conjuction of
3910 f1=f2, .., fn-1=fn. It substitutes any inference from these
3911 equality predicates that is equivalent to the conjunction.
3912 Thus, =(a1,a2,a3) can substitute for ((a1=a3) AND (a2=a3) AND (a2=a1)) as
3913 it is equivalent to ((a1=a2) AND (a2=a3)).
3914 The function always makes a substitution of all equality predicates occured
3915 in a conjunction for a minimal set of multiple equality predicates.
3916 This set can be considered as a canonical representation of the
3917 sub-conjunction of the equality predicates.
3918 E.g. (t1.a=t2.b AND t2.b>5 AND t1.a=t3.c) is replaced by
3919 (=(t1.a,t2.b,t3.c) AND t2.b>5), not by
3920 (=(t1.a,t2.b) AND =(t1.a,t3.c) AND t2.b>5);
3921 while (t1.a=t2.b AND t2.b>5 AND t3.c=t4.d) is replaced by
3922 (=(t1.a,t2.b) AND =(t3.c=t4.d) AND t2.b>5),
3923 but if additionally =(t4.d,t2.b) is inherited, it
3924 will be replaced by (=(t1.a,t2.b,t3.c,t4.d) AND t2.b>5)
3925
3926 The function performs the substitution in a recursive descent of
3927 the condition tree, passing to the next AND level a chain of multiple
3928 equality predicates which have been built at the upper levels.
3929 The Item_equal items built at the level are attached to other
3930 non-equality conjuncts as a sublist. The pointer to the inherited
3931 multiple equalities is saved in the and condition object (Item_cond_and).
3932 This chain allows us for any field reference occurence to easily find a
3933 multiple equality that must be held for this occurence.
3934 For each AND level we do the following:
3935 - scan it for all equality predicate (=) items
3936 - join them into disjoint Item_equal() groups
3937 - process the included OR conditions recursively to do the same for
3938 lower AND levels.
3939
3940 We need to do things in this order as lower AND levels need to know about
3941 all possible Item_equal objects in upper levels.
3942
3943 @param thd thread handle
3944 @param cond condition(expression) where to make replacement
3945 @param[out] retcond returned condition
3946 @param inherited path to all inherited multiple equality items
3947 @param do_inherit whether or not to inherit equalities from other parts
3948 of the condition
3949
3950 @returns false if success, true if error
3951 */
3952
build_equal_items_for_cond(THD * thd,Item * cond,Item ** retcond,COND_EQUAL * inherited,bool do_inherit)3953 static bool build_equal_items_for_cond(THD *thd, Item *cond, Item **retcond,
3954 COND_EQUAL *inherited, bool do_inherit)
3955 {
3956 Item_equal *item_equal;
3957 COND_EQUAL cond_equal;
3958 cond_equal.upper_levels= inherited;
3959
3960 if (check_stack_overrun(thd, STACK_MIN_SIZE, NULL))
3961 return true; // Fatal error flag is set!
3962
3963 const enum Item::Type cond_type= cond->type();
3964 if (cond_type == Item::COND_ITEM)
3965 {
3966 List<Item> eq_list;
3967 Item_cond *const item_cond= down_cast<Item_cond *>(cond);
3968 const bool and_level= item_cond->functype() == Item_func::COND_AND_FUNC;
3969 List<Item> *args= item_cond->argument_list();
3970
3971 List_iterator<Item> li(*args);
3972 Item *item;
3973
3974 if (and_level)
3975 {
3976 /*
3977 Retrieve all conjuncts of this level detecting the equality
3978 that are subject to substitution by multiple equality items and
3979 removing each such predicate from the conjunction after having
3980 found/created a multiple equality whose inference the predicate is.
3981 */
3982 while ((item= li++))
3983 {
3984 /*
3985 PS/SP note: we can safely remove a node from AND-OR
3986 structure here because it's restored before each
3987 re-execution of any prepared statement/stored procedure.
3988 */
3989 bool equality;
3990 if (check_equality(thd, item, &cond_equal, &eq_list, &equality))
3991 return true;
3992 if (equality)
3993 li.remove();
3994 }
3995
3996 /*
3997 Check if we eliminated all the predicates of the level, e.g.
3998 (a=a AND b=b AND a=a).
3999 */
4000 if (!args->elements &&
4001 !cond_equal.current_level.elements &&
4002 !eq_list.elements)
4003 {
4004 *retcond= new Item_int((longlong) 1, 1);
4005 return *retcond == NULL;
4006 }
4007
4008 List_iterator_fast<Item_equal> it(cond_equal.current_level);
4009 while ((item_equal= it++))
4010 {
4011 item_equal->fix_length_and_dec();
4012 item_equal->update_used_tables();
4013 set_if_bigger(thd->lex->current_select()->max_equal_elems,
4014 item_equal->members());
4015 }
4016
4017 Item_cond_and *const item_cond_and= down_cast<Item_cond_and *>(cond);
4018 item_cond_and->cond_equal= cond_equal;
4019 inherited= &item_cond_and->cond_equal;
4020 }
4021 /*
4022 Make replacement of equality predicates for lower levels
4023 of the condition expression.
4024 */
4025 li.rewind();
4026 while ((item= li++))
4027 {
4028 Item *new_item;
4029 if (build_equal_items_for_cond(thd, item, &new_item, inherited,
4030 do_inherit))
4031 return true;
4032 if (new_item != item)
4033 {
4034 /* This replacement happens only for standalone equalities */
4035 /*
4036 This is ok with PS/SP as the replacement is done for
4037 arguments of an AND/OR item, which are restored for each
4038 execution of PS/SP.
4039 */
4040 li.replace(new_item);
4041 }
4042 }
4043 if (and_level)
4044 {
4045 args->concat(&eq_list);
4046 args->concat((List<Item> *)&cond_equal.current_level);
4047 }
4048 }
4049 else if (cond->type() == Item::FUNC_ITEM)
4050 {
4051 List<Item> eq_list;
4052 /*
4053 If an equality predicate forms the whole and level,
4054 we call it standalone equality and it's processed here.
4055 E.g. in the following where condition
4056 WHERE a=5 AND (b=5 or a=c)
4057 (b=5) and (a=c) are standalone equalities.
4058 In general we can't leave alone standalone eqalities:
4059 for WHERE a=b AND c=d AND (b=c OR d=5)
4060 b=c is replaced by =(a,b,c,d).
4061 */
4062 bool equality;
4063 if (check_equality(thd, cond, &cond_equal, &eq_list, &equality))
4064 return true;
4065 if (equality)
4066 {
4067 int n= cond_equal.current_level.elements + eq_list.elements;
4068 if (n == 0)
4069 {
4070 *retcond= new Item_int((longlong) 1,1);
4071 return *retcond == NULL;
4072 }
4073 else if (n == 1)
4074 {
4075 if ((item_equal= cond_equal.current_level.pop()))
4076 {
4077 item_equal->fix_length_and_dec();
4078 item_equal->update_used_tables();
4079 set_if_bigger(thd->lex->current_select()->max_equal_elems,
4080 item_equal->members());
4081 *retcond= item_equal;
4082 return false;
4083 }
4084
4085 *retcond= eq_list.pop();
4086 return false;
4087 }
4088 else
4089 {
4090 /*
4091 Here a new AND level must be created. It can happen only
4092 when a row equality is processed as a standalone predicate.
4093 */
4094 Item_cond_and *and_cond= new Item_cond_and(eq_list);
4095 if (and_cond == NULL)
4096 return true;
4097
4098 and_cond->quick_fix_field();
4099 List<Item> *args= and_cond->argument_list();
4100 List_iterator_fast<Item_equal> it(cond_equal.current_level);
4101 while ((item_equal= it++))
4102 {
4103 item_equal->fix_length_and_dec();
4104 item_equal->update_used_tables();
4105 set_if_bigger(thd->lex->current_select()->max_equal_elems,
4106 item_equal->members());
4107 }
4108 and_cond->cond_equal= cond_equal;
4109 args->concat((List<Item> *)&cond_equal.current_level);
4110
4111 *retcond= and_cond;
4112 return false;
4113 }
4114 }
4115
4116 if (do_inherit)
4117 {
4118 /*
4119 For each field reference in cond, not from equal item predicates,
4120 set a pointer to the multiple equality it belongs to (if there is any)
4121 as soon the field is not of a string type or the field reference is
4122 an argument of a comparison predicate.
4123 */
4124 uchar *is_subst_valid= (uchar *) 1;
4125 cond= cond->compile(&Item::subst_argument_checker,
4126 &is_subst_valid,
4127 &Item::equal_fields_propagator,
4128 (uchar *) inherited);
4129 if (cond == NULL)
4130 return true;
4131 }
4132 cond->update_used_tables();
4133 }
4134 *retcond= cond;
4135 return false;
4136 }
4137
4138
4139 /**
4140 Build multiple equalities for a WHERE condition and all join conditions that
4141 inherit these multiple equalities.
4142
4143 The function first applies the build_equal_items_for_cond function
4144 to build all multiple equalities for condition cond utilizing equalities
4145 referred through the parameter inherited. The extended set of
4146 equalities is returned in the structure referred by the cond_equal_ref
4147 parameter. After this the function calls itself recursively for
4148 all join conditions whose direct references can be found in join_list
4149 and who inherit directly the multiple equalities just having built.
4150
4151 @note
4152 The join condition used in an outer join operation inherits all equalities
4153 from the join condition of the embedding join, if there is any, or
4154 otherwise - from the where condition.
4155 This fact is not obvious, but presumably can be proved.
4156 Consider the following query:
4157 @code
4158 SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t1.a=t3.a AND t2.a=t4.a
4159 WHERE t1.a=t2.a;
4160 @endcode
4161 If the join condition in the query inherits =(t1.a,t2.a), then we
4162 can build the multiple equality =(t1.a,t2.a,t3.a,t4.a) that infers
4163 the equality t3.a=t4.a. Although the join condition
4164 t1.a=t3.a AND t2.a=t4.a AND t3.a=t4.a is not equivalent to the one
4165 in the query the latter can be replaced by the former: the new query
4166 will return the same result set as the original one.
4167
4168 Interesting that multiple equality =(t1.a,t2.a,t3.a,t4.a) allows us
4169 to use t1.a=t3.a AND t3.a=t4.a under the join condition:
4170 @code
4171 SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t1.a=t3.a AND t3.a=t4.a
4172 WHERE t1.a=t2.a
4173 @endcode
4174 This query equivalent to:
4175 @code
4176 SELECT * FROM (t1 LEFT JOIN (t3,t4) ON t1.a=t3.a AND t3.a=t4.a),t2
4177 WHERE t1.a=t2.a
4178 @endcode
4179 Similarly the original query can be rewritten to the query:
4180 @code
4181 SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t2.a=t4.a AND t3.a=t4.a
4182 WHERE t1.a=t2.a
4183 @endcode
4184 that is equivalent to:
4185 @code
4186 SELECT * FROM (t2 LEFT JOIN (t3,t4)ON t2.a=t4.a AND t3.a=t4.a), t1
4187 WHERE t1.a=t2.a
4188 @endcode
4189 Thus, applying equalities from the where condition we basically
4190 can get more freedom in performing join operations.
4191 Although we don't use this property now, it probably makes sense to use
4192 it in the future.
4193
4194 @param thd Thread handler
4195 @param cond condition to build the multiple equalities for
4196 @param[out] retcond Returned condition
4197 @param inherited path to all inherited multiple equality items
4198 @param do_inherit whether or not to inherit equalities from other
4199 parts of the condition
4200 @param join_list list of join tables that the condition refers to
4201 @param[out] cond_equal_ref pointer to the structure to place built
4202 equalities in
4203
4204 @returns false if success, true if error
4205 */
4206
build_equal_items(THD * thd,Item * cond,Item ** retcond,COND_EQUAL * inherited,bool do_inherit,List<TABLE_LIST> * join_list,COND_EQUAL ** cond_equal_ref)4207 bool build_equal_items(THD *thd, Item *cond, Item **retcond,
4208 COND_EQUAL *inherited, bool do_inherit,
4209 List<TABLE_LIST> *join_list,
4210 COND_EQUAL **cond_equal_ref)
4211 {
4212 COND_EQUAL *cond_equal= 0;
4213
4214 if (cond)
4215 {
4216 if (build_equal_items_for_cond(thd, cond, &cond, inherited, do_inherit))
4217 return true;
4218 cond->update_used_tables();
4219 const enum Item::Type cond_type= cond->type();
4220 if (cond_type == Item::COND_ITEM &&
4221 down_cast<Item_cond *>(cond)->functype() == Item_func::COND_AND_FUNC)
4222 cond_equal= &down_cast<Item_cond_and *>(cond)->cond_equal;
4223 else if (cond_type == Item::FUNC_ITEM &&
4224 down_cast<Item_func *>(cond)->functype() == Item_func::MULT_EQUAL_FUNC)
4225 {
4226 cond_equal= new COND_EQUAL;
4227 if (cond_equal == NULL)
4228 return true;
4229 cond_equal->current_level.push_back(down_cast<Item_equal *>(cond));
4230 }
4231 }
4232 if (cond_equal)
4233 {
4234 cond_equal->upper_levels= inherited;
4235 inherited= cond_equal;
4236 }
4237 *cond_equal_ref= cond_equal;
4238
4239 if (join_list)
4240 {
4241 TABLE_LIST *table;
4242 List_iterator<TABLE_LIST> li(*join_list);
4243
4244 while ((table= li++))
4245 {
4246 if (table->join_cond_optim())
4247 {
4248 List<TABLE_LIST> *nested_join_list= table->nested_join ?
4249 &table->nested_join->join_list : NULL;
4250 Item *join_cond;
4251 if (build_equal_items(thd, table->join_cond_optim(), &join_cond,
4252 inherited, do_inherit,
4253 nested_join_list, &table->cond_equal))
4254 return true;
4255 table->set_join_cond_optim(join_cond);
4256 }
4257 }
4258 }
4259
4260 *retcond= cond;
4261 return false;
4262 }
4263
4264
4265 /**
4266 Compare field items by table order in the execution plan.
4267
4268 field1 considered as better than field2 if the table containing
4269 field1 is accessed earlier than the table containing field2.
4270 The function finds out what of two fields is better according
4271 this criteria.
4272
4273 @param field1 first field item to compare
4274 @param field2 second field item to compare
4275 @param table_join_idx index to tables determining table order
4276
4277 @retval
4278 -1 if field1 is better than field2
4279 @retval
4280 1 if field2 is better than field1
4281 @retval
4282 0 otherwise
4283 */
4284
compare_fields_by_table_order(Item_field * field1,Item_field * field2,void * table_join_idx)4285 static int compare_fields_by_table_order(Item_field *field1,
4286 Item_field *field2,
4287 void *table_join_idx)
4288 {
4289 int cmp= 0;
4290 bool outer_ref= 0;
4291 if (field1->used_tables() & OUTER_REF_TABLE_BIT)
4292 {
4293 outer_ref= 1;
4294 cmp= -1;
4295 }
4296 if (field2->used_tables() & OUTER_REF_TABLE_BIT)
4297 {
4298 outer_ref= 1;
4299 cmp++;
4300 }
4301 if (outer_ref)
4302 return cmp;
4303 JOIN_TAB **idx= (JOIN_TAB **) table_join_idx;
4304
4305 /*
4306 idx is NULL if this function was not called from JOIN::optimize()
4307 but from e.g. mysql_delete() or mysql_update(). In these cases
4308 there is only one table and both fields belong to it. Example
4309 condition where this is the case: t1.fld1=t1.fld2
4310 */
4311 if (!idx)
4312 return 0;
4313
4314 // Locate JOIN_TABs thanks to table_join_idx, then compare their index.
4315 cmp= idx[field1->table_ref->tableno()]->idx() -
4316 idx[field2->table_ref->tableno()]->idx();
4317 return cmp < 0 ? -1 : (cmp ? 1 : 0);
4318 }
4319
4320
4321 /**
4322 Generate minimal set of simple equalities equivalent to a multiple equality.
4323
4324 The function retrieves the fields of the multiple equality item
4325 item_equal and for each field f:
4326 - if item_equal contains const it generates the equality f=const_item;
4327 - otherwise, if f is not the first field, generates the equality
4328 f=item_equal->get_first().
4329 All generated equality are added to the cond conjunction.
4330
4331 @param cond condition to add the generated equality to
4332 @param upper_levels structure to access multiple equality of upper levels
4333 @param item_equal multiple equality to generate simple equality from
4334
4335 @note
4336 Before generating an equality function checks that it has not
4337 been generated for multiple equalities of the upper levels.
4338 E.g. for the following where condition
4339 WHERE a=5 AND ((a=b AND b=c) OR c>4)
4340 the upper level AND condition will contain =(5,a),
4341 while the lower level AND condition will contain =(5,a,b,c).
4342 When splitting =(5,a,b,c) into a separate equality predicates
4343 we should omit 5=a, as we have it already in the upper level.
4344 The following where condition gives us a more complicated case:
4345 WHERE t1.a=t2.b AND t3.c=t4.d AND (t2.b=t3.c OR t4.e>5 ...) AND ...
4346 Given the tables are accessed in the order t1->t2->t3->t4 for
4347 the selected query execution plan the lower level multiple
4348 equality =(t1.a,t2.b,t3.c,t4.d) formally should be converted to
4349 t1.a=t2.b AND t1.a=t3.c AND t1.a=t4.d. But t1.a=t2.a will be
4350 generated for the upper level. Also t3.c=t4.d will be generated there.
4351 So only t1.a=t3.c should be left in the lower level.
4352 If cond is equal to 0, then not more then one equality is generated
4353 and a pointer to it is returned as the result of the function.
4354
4355 @return
4356 - The condition with generated simple equalities or
4357 a pointer to the simple generated equality, if success.
4358 - 0, otherwise.
4359 */
4360
eliminate_item_equal(Item * cond,COND_EQUAL * upper_levels,Item_equal * item_equal)4361 static Item *eliminate_item_equal(Item *cond, COND_EQUAL *upper_levels,
4362 Item_equal *item_equal)
4363 {
4364 List<Item> eq_list;
4365 Item_func_eq *eq_item= NULL;
4366 if (((Item *) item_equal)->const_item() && !item_equal->val_int())
4367 return new Item_int((longlong) 0,1);
4368 Item *const item_const= item_equal->get_const();
4369 Item_equal_iterator it(*item_equal);
4370 if (!item_const)
4371 {
4372 /*
4373 If there is a const item, match all field items with the const item,
4374 otherwise match the second and subsequent field items with the first one:
4375 */
4376 it++;
4377 }
4378 Item_field *item_field; // Field to generate equality for.
4379 while ((item_field= it++))
4380 {
4381 /*
4382 Generate an equality of the form:
4383 item_field = some previous field in item_equal's list.
4384
4385 First see if we really need to generate it:
4386 */
4387 Item_equal *const upper= item_field->find_item_equal(upper_levels);
4388 if (upper) // item_field is in this upper equality
4389 {
4390 if (item_const && upper->get_const())
4391 continue; // Const at both levels, no need to generate at current level
4392 /*
4393 If the upper-level multiple equality contains this item, there is no
4394 need to generate the equality, unless item_field belongs to a
4395 semi-join nest that is used for Materialization, and refers to tables
4396 that are outside of the materialized semi-join nest,
4397 As noted in Item_equal::get_subst_item(), subquery materialization
4398 does not have this problem.
4399 */
4400 JOIN_TAB *const tab= item_field->field->table->reginfo.join_tab;
4401
4402 if (!(tab && sj_is_materialize_strategy(tab->get_sj_strategy())))
4403 {
4404 Item_field *item_match;
4405 Item_equal_iterator li(*item_equal);
4406 while ((item_match= li++) != item_field)
4407 {
4408 if (item_match->find_item_equal(upper_levels) == upper)
4409 break; // (item_match, item_field) is also in upper level equality
4410 }
4411 if (item_match != item_field)
4412 continue;
4413 }
4414 } // ... if (upper).
4415
4416 /*
4417 item_field should be compared with the head of the multiple equality
4418 list.
4419 item_field may refer to a table that is within a semijoin materialization
4420 nest. In that case, the order of the join_tab entries may look like:
4421
4422 ot1 ot2 <subquery> ot5 SJM(it3 it4)
4423
4424 If we have a multiple equality
4425
4426 (ot1.c1, ot2.c2, <subquery>.c it3.c3, it4.c4, ot5.c5),
4427
4428 we should generate the following equalities:
4429 1. ot1.c1 = ot2.c2
4430 2. ot1.c1 = <subquery>.c
4431 3. it3.c3 = it4.c4
4432 4. ot1.c1 = ot5.c5
4433
4434 Equalities 1) and 4) are regular equalities between two outer tables.
4435 Equality 2) is an equality that matches the outer query with a
4436 materialized temporary table. It is either performed as a lookup
4437 into the materialized table (SJM-lookup), or as a condition on the
4438 outer table (SJM-scan).
4439 Equality 3) is evaluated during semijoin materialization.
4440
4441 If there is a const item, match against this one.
4442 Otherwise, match against the first field item in the multiple equality,
4443 unless the item is within a materialized semijoin nest, in case it will
4444 be matched against the first item within the SJM nest.
4445 @see JOIN::set_prefix_tables()
4446 @see Item_equal::get_subst_item()
4447 */
4448
4449 Item *const head=
4450 item_const ? item_const : item_equal->get_subst_item(item_field);
4451 if (head == item_field)
4452 continue;
4453
4454 // we have a pair, can generate 'item_field=head'
4455 if (eq_item)
4456 eq_list.push_back(eq_item);
4457
4458 eq_item= new Item_func_eq(item_field, head);
4459 if (!eq_item || eq_item->set_cmp_func())
4460 return NULL;
4461 eq_item->quick_fix_field();
4462 } // ... while ((item_field= it++))
4463
4464 if (!cond && !eq_list.head())
4465 {
4466 if (!eq_item)
4467 return new Item_int((longlong) 1,1);
4468 return eq_item;
4469 }
4470
4471 if (eq_item)
4472 eq_list.push_back(eq_item);
4473 if (!cond)
4474 cond= new Item_cond_and(eq_list);
4475 else
4476 {
4477 assert(cond->type() == Item::COND_ITEM);
4478 if (eq_list.elements)
4479 ((Item_cond *) cond)->add_at_head(&eq_list);
4480 }
4481
4482 cond->quick_fix_field();
4483 cond->update_used_tables();
4484
4485 return cond;
4486 }
4487
4488
4489 /**
4490 Substitute every field reference in a condition by the best equal field
4491 and eliminate all multiple equality predicates.
4492
4493 The function retrieves the cond condition and for each encountered
4494 multiple equality predicate it sorts the field references in it
4495 according to the order of tables specified by the table_join_idx
4496 parameter. Then it eliminates the multiple equality predicate it
4497 replacing it by the conjunction of simple equality predicates
4498 equating every field from the multiple equality to the first
4499 field in it, or to the constant, if there is any.
4500 After this the function retrieves all other conjuncted
4501 predicates substitute every field reference by the field reference
4502 to the first equal field or equal constant if there are any.
4503
4504 @param cond condition to process
4505 @param cond_equal multiple equalities to take into consideration
4506 @param table_join_idx index to tables determining field preference
4507
4508 @note
4509 At the first glance full sort of fields in multiple equality
4510 seems to be an overkill. Yet it's not the case due to possible
4511 new fields in multiple equality item of lower levels. We want
4512 the order in them to comply with the order of upper levels.
4513
4514 @return
4515 The transformed condition, or NULL in case of error
4516 */
4517
substitute_for_best_equal_field(Item * cond,COND_EQUAL * cond_equal,void * table_join_idx)4518 Item* substitute_for_best_equal_field(Item *cond,
4519 COND_EQUAL *cond_equal,
4520 void *table_join_idx)
4521 {
4522 Item_equal *item_equal;
4523
4524 if (cond->type() == Item::COND_ITEM)
4525 {
4526 List<Item> *cond_list= ((Item_cond*) cond)->argument_list();
4527
4528 bool and_level= ((Item_cond*) cond)->functype() ==
4529 Item_func::COND_AND_FUNC;
4530 if (and_level)
4531 {
4532 cond_equal= &((Item_cond_and *) cond)->cond_equal;
4533 cond_list->disjoin((List<Item> *) &cond_equal->current_level);
4534
4535 List_iterator_fast<Item_equal> it(cond_equal->current_level);
4536 while ((item_equal= it++))
4537 {
4538 item_equal->sort(&compare_fields_by_table_order, table_join_idx);
4539 }
4540 }
4541
4542 List_iterator<Item> li(*cond_list);
4543 Item *item;
4544 while ((item= li++))
4545 {
4546 Item *new_item= substitute_for_best_equal_field(item, cond_equal,
4547 table_join_idx);
4548 if (new_item == NULL)
4549 return NULL;
4550 /*
4551 This works OK with PS/SP re-execution as changes are made to
4552 the arguments of AND/OR items only
4553 */
4554 if (new_item != item)
4555 li.replace(new_item);
4556 }
4557
4558 if (and_level)
4559 {
4560 List_iterator_fast<Item_equal> it(cond_equal->current_level);
4561 while ((item_equal= it++))
4562 {
4563 cond= eliminate_item_equal(cond, cond_equal->upper_levels, item_equal);
4564 if (cond == NULL)
4565 return NULL;
4566 // This occurs when eliminate_item_equal() founds that cond is
4567 // always false and substitutes it with Item_int 0.
4568 // Due to this, value of item_equal will be 0, so just return it.
4569 if (cond->type() != Item::COND_ITEM)
4570 break;
4571 }
4572 }
4573 if (cond->type() == Item::COND_ITEM &&
4574 !((Item_cond*)cond)->argument_list()->elements)
4575 cond= new Item_int((int32)cond->val_bool());
4576
4577 }
4578 else if (cond->type() == Item::FUNC_ITEM &&
4579 ((Item_cond*) cond)->functype() == Item_func::MULT_EQUAL_FUNC)
4580 {
4581 item_equal= (Item_equal *) cond;
4582 item_equal->sort(&compare_fields_by_table_order, table_join_idx);
4583 if (cond_equal && cond_equal->current_level.head() == item_equal)
4584 cond_equal= cond_equal->upper_levels;
4585 return eliminate_item_equal(0, cond_equal, item_equal);
4586 }
4587 else
4588 cond->transform(&Item::replace_equal_field, 0);
4589 return cond;
4590 }
4591
4592
4593 /**
4594 change field = field to field = const for each found field = const in the
4595 and_level
4596
4597 @param thd Thread handler
4598 @param save_list
4599 @param and_father
4600 @param cond Condition where fields are replaced with constant values
4601 @param field The field that will be substituted
4602 @param value The substitution value
4603
4604 @returns false if success, true if error
4605 */
4606
4607 static bool
change_cond_ref_to_const(THD * thd,I_List<COND_CMP> * save_list,Item * and_father,Item * cond,Item * field,Item * value)4608 change_cond_ref_to_const(THD *thd, I_List<COND_CMP> *save_list,
4609 Item *and_father, Item *cond,
4610 Item *field, Item *value)
4611 {
4612 if (cond->type() == Item::COND_ITEM)
4613 {
4614 Item_cond *const item_cond= down_cast<Item_cond *>(cond);
4615 bool and_level= item_cond->functype() == Item_func::COND_AND_FUNC;
4616 List_iterator<Item> li(*item_cond->argument_list());
4617 Item *item;
4618 while ((item=li++))
4619 {
4620 if (change_cond_ref_to_const(thd, save_list,
4621 and_level ? cond : item,
4622 item, field, value))
4623 return true;
4624 }
4625 return false;
4626 }
4627 if (cond->eq_cmp_result() == Item::COND_OK)
4628 return false; // Not a boolean function
4629
4630 Item_bool_func2 *func= down_cast<Item_bool_func2 *>(cond);
4631 Item **args= func->arguments();
4632 Item *left_item= args[0];
4633 Item *right_item= args[1];
4634 Item_func::Functype functype= func->functype();
4635
4636 if (right_item->eq(field,0) && left_item != value &&
4637 right_item->cmp_context == field->cmp_context &&
4638 (left_item->result_type() != STRING_RESULT ||
4639 value->result_type() != STRING_RESULT ||
4640 left_item->collation.collation == value->collation.collation))
4641 {
4642 Item *const clone= value->clone_item();
4643 if (thd->is_error())
4644 return true;
4645
4646 if (clone == NULL)
4647 return false;
4648
4649 clone->collation.set(right_item->collation);
4650 thd->change_item_tree(args + 1, clone);
4651 func->update_used_tables();
4652 if ((functype == Item_func::EQ_FUNC ||
4653 functype == Item_func::EQUAL_FUNC) &&
4654 and_father != cond && !left_item->const_item())
4655 {
4656 cond->marker=1;
4657 COND_CMP *const cond_cmp= new COND_CMP(and_father,func);
4658 if (cond_cmp == NULL)
4659 return true;
4660
4661 save_list->push_back(cond_cmp);
4662
4663 }
4664 if (func->set_cmp_func())
4665 return true;
4666 }
4667 else if (left_item->eq(field,0) && right_item != value &&
4668 left_item->cmp_context == field->cmp_context &&
4669 (right_item->result_type() != STRING_RESULT ||
4670 value->result_type() != STRING_RESULT ||
4671 right_item->collation.collation == value->collation.collation))
4672 {
4673 Item *const clone= value->clone_item();
4674 if (thd->is_error())
4675 return true;
4676
4677 if (clone == NULL)
4678 return false;
4679
4680 clone->collation.set(left_item->collation);
4681 thd->change_item_tree(args, clone);
4682 value= clone;
4683 func->update_used_tables();
4684 if ((functype == Item_func::EQ_FUNC ||
4685 functype == Item_func::EQUAL_FUNC) &&
4686 and_father != cond && !right_item->const_item())
4687 {
4688 args[0]= args[1]; // For easy check
4689 thd->change_item_tree(args + 1, value);
4690 cond->marker=1;
4691 COND_CMP *const cond_cmp= new COND_CMP(and_father,func);
4692 if (cond_cmp == NULL)
4693 return true;
4694
4695 save_list->push_back(cond_cmp);
4696 }
4697 if (func->set_cmp_func())
4698 return true;
4699 }
4700 return false;
4701 }
4702
4703 /**
4704 Propagate constant values in a condition
4705
4706 @param thd Thread handler
4707 @param save_list
4708 @param and_father
4709 @param cond Condition for which constant values are propagated
4710
4711 @returns false if success, true if error
4712 */
4713 static bool
propagate_cond_constants(THD * thd,I_List<COND_CMP> * save_list,Item * and_father,Item * cond)4714 propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list,
4715 Item *and_father, Item *cond)
4716 {
4717 if (cond->type() == Item::COND_ITEM)
4718 {
4719 Item_cond *const item_cond= down_cast<Item_cond *>(cond);
4720 bool and_level= item_cond->functype() == Item_func::COND_AND_FUNC;
4721 List_iterator_fast<Item> li(*item_cond->argument_list());
4722 Item *item;
4723 I_List<COND_CMP> save;
4724 while ((item=li++))
4725 {
4726 if (propagate_cond_constants(thd, &save, and_level ? cond : item, item))
4727 return true;
4728 }
4729 if (and_level)
4730 { // Handle other found items
4731 I_List_iterator<COND_CMP> cond_itr(save);
4732 COND_CMP *cond_cmp;
4733 while ((cond_cmp= cond_itr++))
4734 {
4735 Item **args= cond_cmp->cmp_func->arguments();
4736 if (!args[0]->const_item() &&
4737 change_cond_ref_to_const(thd, &save, cond_cmp->and_level,
4738 cond_cmp->and_level, args[0], args[1]))
4739 return true;
4740 }
4741 }
4742 }
4743 else if (and_father != cond && !cond->marker) // In a AND group
4744 {
4745 Item_func *func;
4746 if (cond->type() == Item::FUNC_ITEM &&
4747 (func= down_cast<Item_func *>(cond)) &&
4748 (func->functype() == Item_func::EQ_FUNC ||
4749 func->functype() == Item_func::EQUAL_FUNC))
4750 {
4751 Item **args= func->arguments();
4752 bool left_const= args[0]->const_item();
4753 bool right_const= args[1]->const_item();
4754 if (!(left_const && right_const) &&
4755 args[0]->result_type() == args[1]->result_type())
4756 {
4757 if (right_const)
4758 {
4759 if (resolve_const_item(thd, &args[1], args[0]))
4760 return true;
4761 func->update_used_tables();
4762 if (change_cond_ref_to_const(thd, save_list, and_father, and_father,
4763 args[0], args[1]))
4764 return true;
4765 }
4766 else if (left_const)
4767 {
4768 if (resolve_const_item(thd, &args[0], args[1]))
4769 return true;
4770 func->update_used_tables();
4771 if (change_cond_ref_to_const(thd, save_list, and_father, and_father,
4772 args[1], args[0]))
4773 return true;
4774 }
4775 }
4776 }
4777 }
4778
4779 return false;
4780 }
4781
4782
4783 /**
4784 Assign each nested join structure a bit in nested_join_map.
4785
4786 @param join_list List of tables
4787 @param first_unused Number of first unused bit in nested_join_map before the
4788 call
4789
4790 @note
4791 This function is called after simplify_joins(), when there are no
4792 redundant nested joins.
4793 We cannot have more nested joins in a query block than there are tables,
4794 so as long as the number of bits in nested_join_map is not less than the
4795 maximum number of tables in a query block, nested_join_map can never
4796 overflow.
4797
4798 @return
4799 First unused bit in nested_join_map after the call.
4800 */
4801
build_bitmap_for_nested_joins(List<TABLE_LIST> * join_list,uint first_unused)4802 uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list,
4803 uint first_unused)
4804 {
4805 List_iterator<TABLE_LIST> li(*join_list);
4806 TABLE_LIST *table;
4807 DBUG_ENTER("build_bitmap_for_nested_joins");
4808 while ((table= li++))
4809 {
4810 NESTED_JOIN *nested_join;
4811 if ((nested_join= table->nested_join))
4812 {
4813 // We should have either a join condition or a semi-join condition
4814 assert((table->join_cond() == NULL) == (table->sj_cond() != NULL));
4815
4816 nested_join->nj_map= 0;
4817 nested_join->nj_total= 0;
4818 /*
4819 We only record nested join information for outer join nests.
4820 Tables belonging in semi-join nests are recorded in the
4821 embedding outer join nest, if one exists.
4822 */
4823 if (table->join_cond())
4824 {
4825 assert(first_unused < sizeof(nested_join_map)*8);
4826 nested_join->nj_map= (nested_join_map) 1 << first_unused++;
4827 nested_join->nj_total= nested_join->join_list.elements;
4828 }
4829 else if (table->sj_cond())
4830 {
4831 NESTED_JOIN *const outer_nest=
4832 table->embedding ? table->embedding->nested_join : NULL;
4833 /*
4834 The semi-join nest has already been counted into the table count
4835 for the outer join nest as one table, so subtract 1 from the
4836 table count.
4837 */
4838 if (outer_nest)
4839 outer_nest->nj_total+= (nested_join->join_list.elements - 1);
4840 }
4841 else
4842 assert(false);
4843
4844 first_unused= build_bitmap_for_nested_joins(&nested_join->join_list,
4845 first_unused);
4846 }
4847 }
4848 DBUG_RETURN(first_unused);
4849 }
4850
4851
4852 /** Update the dependency map for the tables. */
4853
update_depend_map()4854 void JOIN::update_depend_map()
4855 {
4856 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
4857 for (uint tableno = 0; tableno < tables; tableno++)
4858 {
4859 JOIN_TAB *const tab= best_ref[tableno];
4860 TABLE_REF *const ref= &tab->ref();
4861 table_map depend_map= 0;
4862 Item **item= ref->items;
4863 for (uint i = 0; i < ref->key_parts; i++, item++)
4864 depend_map|= (*item)->used_tables();
4865 depend_map&= ~PSEUDO_TABLE_BITS;
4866 ref->depend_map= depend_map;
4867 for (JOIN_TAB **tab2= map2table; depend_map; tab2++, depend_map >>= 1)
4868 {
4869 if (depend_map & 1)
4870 ref->depend_map|= (*tab2)->ref().depend_map;
4871 }
4872 }
4873 }
4874
4875
4876 /** Update the dependency map for the sort order. */
4877
update_depend_map(ORDER * order)4878 void JOIN::update_depend_map(ORDER *order)
4879 {
4880 for (; order ; order=order->next)
4881 {
4882 table_map depend_map;
4883 order->item[0]->update_used_tables();
4884 order->depend_map= depend_map=
4885 order->item[0]->used_tables() & ~PARAM_TABLE_BIT;
4886 order->used= 0;
4887 // Not item_sum(), RAND() and no reference to table outside of sub select
4888 if (!(order->depend_map & (OUTER_REF_TABLE_BIT | RAND_TABLE_BIT))
4889 && !order->item[0]->with_sum_func)
4890 {
4891 for (JOIN_TAB **tab= map2table; depend_map; tab++, depend_map >>= 1)
4892 {
4893 if (depend_map & 1)
4894 order->depend_map|=(*tab)->ref().depend_map;
4895 }
4896 }
4897 }
4898 }
4899
4900
4901 /**
4902 Update equalities and keyuse references after semi-join materialization
4903 strategy is chosen.
4904
4905 @details
4906 For each multiple equality that contains a field that is selected
4907 from a subquery, and that subquery is executed using a semi-join
4908 materialization strategy, add the corresponding column in the materialized
4909 temporary table to the equality.
4910 For each injected semi-join equality that is not converted to
4911 multiple equality, replace the reference to the expression selected
4912 from the subquery with the corresponding column in the temporary table.
4913
4914 This is needed to properly reflect the equalities that involve injected
4915 semi-join equalities when materialization strategy is chosen.
4916 @see eliminate_item_equal() for how these equalities are used to generate
4917 correct equality predicates.
4918
4919 The MaterializeScan semi-join strategy requires some additional processing:
4920 All primary tables after the materialized temporary table must be inspected
4921 for keyuse objects that point to expressions from the subquery tables.
4922 These references must be replaced with references to corresponding columns
4923 in the materialized temporary table instead. Those primary tables using
4924 ref access will thus be made to depend on the materialized temporary table
4925 instead of the subquery tables.
4926
4927 Only the injected semi-join equalities need this treatment, other predicates
4928 will be handled correctly by the regular item substitution process.
4929
4930 @return False if success, true if error
4931 */
4932
update_equalities_for_sjm()4933 bool JOIN::update_equalities_for_sjm()
4934 {
4935 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
4936 List_iterator<Semijoin_mat_exec> it(sjm_exec_list);
4937 Semijoin_mat_exec *sjm_exec;
4938 while ((sjm_exec= it++))
4939 {
4940 TABLE_LIST *const sj_nest= sjm_exec->sj_nest;
4941
4942 assert(!sj_nest->outer_join_nest());
4943 /*
4944 A materialized semi-join nest cannot actually be an inner part of an
4945 outer join yet, this is just a preparatory step,
4946 ie sj_nest->outer_join_nest() is always NULL here.
4947 @todo: Enable outer joining here later.
4948 */
4949 Item *cond= sj_nest->outer_join_nest() ?
4950 sj_nest->outer_join_nest()->join_cond_optim() : where_cond;
4951 if (!cond)
4952 continue;
4953
4954 uchar *dummy= NULL;
4955 cond= cond->compile(&Item::equality_substitution_analyzer, &dummy,
4956 &Item::equality_substitution_transformer,
4957 (uchar *)sj_nest);
4958 if (cond == NULL)
4959 return true;
4960
4961 cond->update_used_tables();
4962
4963 // Loop over all primary tables that follow the materialized table
4964 for (uint j= sjm_exec->mat_table_index + 1; j < primary_tables; j++)
4965 {
4966 JOIN_TAB *const tab= best_ref[j];
4967 for (Key_use *keyuse= tab->position()->key;
4968 keyuse && keyuse->table_ref == tab->table_ref &&
4969 keyuse->key == tab->position()->key->key;
4970 keyuse++)
4971 {
4972 List_iterator<Item> it(sj_nest->nested_join->sj_inner_exprs);
4973 Item *old;
4974 uint fieldno= 0;
4975 while ((old= it++))
4976 {
4977 if (old->real_item()->eq(keyuse->val->real_item(), false))
4978 {
4979 /*
4980 Replace the expression selected from the subquery with the
4981 corresponding column of the materialized temporary table.
4982 */
4983 keyuse->val= sj_nest->nested_join->sjm.mat_fields[fieldno];
4984 keyuse->used_tables= keyuse->val->used_tables();
4985 break;
4986 }
4987 fieldno++;
4988 }
4989 }
4990 }
4991 }
4992
4993 return false;
4994 }
4995
4996
4997 /**
4998 Assign set of available (prefix) tables to all tables in query block.
4999 Also set added tables, ie the tables added in each JOIN_TAB compared to the
5000 previous JOIN_TAB.
5001 This function must be called for every query block after the table order
5002 has been determined.
5003 */
5004
set_prefix_tables()5005 void JOIN::set_prefix_tables()
5006 {
5007 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
5008 assert(!plan_is_const());
5009 /*
5010 The const tables are available together with the first non-const table in
5011 the join order.
5012 */
5013 table_map const initial_tables_map= const_table_map |
5014 (allow_outer_refs ? OUTER_REF_TABLE_BIT : 0);
5015
5016 table_map current_tables_map= initial_tables_map;
5017 table_map prev_tables_map= (table_map) 0;
5018 table_map saved_tables_map= (table_map) 0;
5019
5020 JOIN_TAB *last_non_sjm_tab= NULL; // Track the last non-sjm table
5021
5022 for (uint i= const_tables; i < tables; i++)
5023 {
5024 JOIN_TAB *const tab= best_ref[i];
5025 if (!tab->table())
5026 continue;
5027 /*
5028 Tables that are within SJ-Materialization nests cannot have their
5029 conditions referring to preceding non-const tables.
5030 - If we're looking at the first SJM table, reset current_tables_map
5031 to refer to only allowed tables
5032 @see Item_equal::get_subst_item()
5033 @see eliminate_item_equal()
5034 */
5035 if (sj_is_materialize_strategy(tab->get_sj_strategy()))
5036 {
5037 const table_map sjm_inner_tables= tab->emb_sj_nest->sj_inner_tables;
5038 if (!(sjm_inner_tables & current_tables_map))
5039 {
5040 saved_tables_map= current_tables_map;
5041 current_tables_map= initial_tables_map;
5042 prev_tables_map= (table_map) 0;
5043 }
5044
5045 current_tables_map|= tab->table_ref->map();
5046 tab->set_prefix_tables(current_tables_map, prev_tables_map);
5047 prev_tables_map= current_tables_map;
5048
5049 if (!(sjm_inner_tables & ~current_tables_map))
5050 {
5051 /*
5052 At the end of a semi-join materialization nest,
5053 add non-deterministic expressions to the last table of the nest:
5054 */
5055 tab->add_prefix_tables(RAND_TABLE_BIT);
5056
5057 // Restore the previous map:
5058 current_tables_map= saved_tables_map;
5059 prev_tables_map= last_non_sjm_tab ?
5060 last_non_sjm_tab->prefix_tables() : (table_map) 0;
5061 }
5062 }
5063 else
5064 {
5065 last_non_sjm_tab= tab;
5066 current_tables_map|= tab->table_ref->map();
5067 tab->set_prefix_tables(current_tables_map, prev_tables_map);
5068 prev_tables_map= current_tables_map;
5069 }
5070 }
5071 /*
5072 Non-deterministic expressions must be added to the last table's condition.
5073 It solves problem with queries like SELECT * FROM t1 WHERE rand() > 0.5
5074 */
5075 if (last_non_sjm_tab != NULL)
5076 last_non_sjm_tab->add_prefix_tables(RAND_TABLE_BIT);
5077 }
5078
5079
5080 /**
5081 Calculate best possible join order and initialize the join structure.
5082
5083 @return true if success, false if error.
5084
5085 The JOIN object is populated with statistics about the query,
5086 and a plan with table order and access method selection is made.
5087
5088 The list of tables to be optimized is taken from select_lex->leaf_tables.
5089 JOIN::where_cond is also used in the optimization.
5090 As a side-effect, JOIN::keyuse_array is populated with key_use information.
5091
5092 Here is an overview of the logic of this function:
5093
5094 - Initialize JOIN data structures and setup basic dependencies between tables.
5095
5096 - Update dependencies based on join information.
5097
5098 - Make key descriptions (update_ref_and_keys()).
5099
5100 - Pull out semi-join tables based on table dependencies.
5101
5102 - Extract tables with zero or one rows as const tables.
5103
5104 - Read contents of const tables, substitute columns from these tables with
5105 actual data. Also keep track of empty tables vs. one-row tables.
5106
5107 - After const table extraction based on row count, more tables may
5108 have become functionally dependent. Extract these as const tables.
5109
5110 - Add new sargable predicates based on retrieved const values.
5111
5112 - Calculate number of rows to be retrieved from each table.
5113
5114 - Calculate cost of potential semi-join materializations.
5115
5116 - Calculate best possible join order based on available statistics.
5117
5118 - Fill in remaining information for the generated join order.
5119 */
5120
make_join_plan()5121 bool JOIN::make_join_plan()
5122 {
5123 DBUG_ENTER("JOIN::make_join_plan");
5124
5125 SARGABLE_PARAM *sargables= NULL;
5126
5127 Opt_trace_context * const trace= &thd->opt_trace;
5128
5129 if (init_planner_arrays()) // Create and initialize the arrays
5130 DBUG_RETURN(true);
5131
5132 // Outer join dependencies were initialized above, now complete the analysis.
5133 if (select_lex->outer_join)
5134 propagate_dependencies();
5135
5136 if (unlikely(trace->is_started()))
5137 trace_table_dependencies(trace, join_tab, primary_tables);
5138
5139 // Build the key access information, which is the basis for ref access.
5140 if (where_cond || select_lex->outer_join)
5141 {
5142 if (update_ref_and_keys(thd, &keyuse_array, join_tab, tables, where_cond,
5143 cond_equal, ~select_lex->outer_join, select_lex,
5144 &sargables))
5145 DBUG_RETURN(true);
5146 }
5147
5148 /*
5149 Pull out semi-join tables based on dependencies. Dependencies are valid
5150 throughout the lifetime of a query, so this operation can be performed
5151 on the first optimization only.
5152 */
5153 if (!select_lex->sj_pullout_done && select_lex->sj_nests.elements &&
5154 pull_out_semijoin_tables(this))
5155 DBUG_RETURN(true);
5156
5157 select_lex->sj_pullout_done= true;
5158 const uint sj_nests= select_lex->sj_nests.elements; // Changed by pull-out
5159
5160 if (!(select_lex->active_options() & OPTION_NO_CONST_TABLES))
5161 {
5162 // Detect tables that are const (0 or 1 row) and read their contents.
5163 if (extract_const_tables())
5164 DBUG_RETURN(true);
5165
5166 // Detect tables that are functionally dependent on const values.
5167 if (extract_func_dependent_tables())
5168 DBUG_RETURN(true);
5169 }
5170 // Possibly able to create more sargable predicates from const rows.
5171 if (const_tables && sargables)
5172 update_sargable_from_const(sargables);
5173
5174 // Make a first estimate of the fanout for each table in the query block.
5175 if (estimate_rowcount())
5176 DBUG_RETURN(true);
5177
5178 if (sj_nests)
5179 {
5180 set_semijoin_embedding();
5181 select_lex->update_semijoin_strategies(thd);
5182 }
5183
5184 if (!plan_is_const())
5185 optimize_keyuse();
5186
5187 allow_outer_refs= true;
5188
5189 if (sj_nests && optimize_semijoin_nests_for_materialization(this))
5190 DBUG_RETURN(true);
5191
5192 // Choose the table order based on analysis done so far.
5193 if (Optimize_table_order(thd, this, NULL).choose_table_order())
5194 DBUG_RETURN(true);
5195
5196 DBUG_EXECUTE_IF("bug13820776_1", thd->killed= THD::KILL_QUERY;);
5197 if (thd->killed || thd->is_error())
5198 DBUG_RETURN(true);
5199
5200 // If this is a subquery, decide between In-to-exists and materialization
5201 if (unit->item && decide_subquery_strategy())
5202 DBUG_RETURN(true);
5203
5204 refine_best_rowcount();
5205
5206 if (!(thd->variables.option_bits & OPTION_BIG_SELECTS) &&
5207 best_read > (double) thd->variables.max_join_size &&
5208 !thd->lex->is_explain())
5209 { /* purecov: inspected */
5210 my_message(ER_TOO_BIG_SELECT, ER(ER_TOO_BIG_SELECT), MYF(0));
5211 error= -1;
5212 DBUG_RETURN(1);
5213 }
5214
5215 positions= NULL; // But keep best_positions for get_best_combination
5216
5217 /*
5218 Store the cost of this query into a user variable
5219 Don't update m_current_query_cost for statements that are not "flat joins" :
5220 i.e. they have subqueries, unions or call stored procedures.
5221 TODO: calculate a correct cost for a query with subqueries and UNIONs.
5222 */
5223 if (thd->lex->is_single_level_stmt())
5224 thd->m_current_query_cost= best_read;
5225
5226 // Generate an execution plan from the found optimal join order.
5227 if (get_best_combination())
5228 DBUG_RETURN(true);
5229
5230 // Cleanup after update_ref_and_keys has added keys for derived tables.
5231 if (select_lex->materialized_derived_table_count)
5232 drop_unused_derived_keys();
5233
5234 // No need for this struct after new JOIN_TAB array is set up.
5235 best_positions= NULL;
5236
5237 // Some called function may still set error status unnoticed
5238 if (thd->is_error())
5239 DBUG_RETURN(true);
5240
5241 // There is at least one empty const table
5242 if (const_table_map != found_const_table_map)
5243 zero_result_cause= "no matching row in const table";
5244
5245 DBUG_RETURN(false);
5246 }
5247
5248
5249 /**
5250 Initialize scratch arrays for the join order optimization
5251
5252 @returns false if success, true if error
5253
5254 @note If something fails during initialization, JOIN::cleanup()
5255 will free anything that has been partially allocated and set up.
5256 Arrays are created in the execution mem_root, so they will be
5257 deleted automatically when the mem_root is re-initialized.
5258 */
5259
init_planner_arrays()5260 bool JOIN::init_planner_arrays()
5261 {
5262 // Up to one extra slot per semi-join nest is needed (if materialized)
5263 const uint sj_nests= select_lex->sj_nests.elements;
5264 const uint table_count= select_lex->leaf_table_count;
5265
5266 assert(primary_tables == 0 && tables == 0);
5267
5268 if (!(join_tab= alloc_jtab_array(thd, table_count)))
5269 return true;
5270
5271 /*
5272 We add 2 cells:
5273 - because planning stage uses 0-termination so needs +1
5274 - because after get_best_combination, we don't use 0-termination but
5275 need +2, to host at most 2 tmp sort/group/distinct tables.
5276 */
5277 if (!(best_ref= (JOIN_TAB **) thd->alloc(sizeof(JOIN_TAB *) *
5278 (table_count + sj_nests + 2))))
5279 return true;
5280
5281 // sort/group tmp tables have no map
5282 if (!(map2table= (JOIN_TAB **) thd->alloc(sizeof(JOIN_TAB *) *
5283 (table_count + sj_nests))))
5284 return true;
5285
5286 if (!(positions= new (thd->mem_root) POSITION[table_count]))
5287 return true;
5288
5289 if (!(best_positions= new (thd->mem_root) POSITION[table_count+sj_nests]))
5290 return true;
5291
5292 /*
5293 Initialize data structures for tables to be joined.
5294 Initialize dependencies between tables.
5295 */
5296 JOIN_TAB **best_ref_p= best_ref;
5297 TABLE_LIST *tl= select_lex->leaf_tables;
5298
5299 for (JOIN_TAB *tab= join_tab;
5300 tl;
5301 tab++, tl= tl->next_leaf, best_ref_p++)
5302 {
5303 *best_ref_p= tab;
5304 TABLE *const table= tl->table;
5305 tab->table_ref= tl;
5306 tab->set_table(table);
5307 const int err= tl->fetch_number_of_rows();
5308
5309 // Initialize the cost model for the table
5310 table->init_cost_model(cost_model());
5311
5312 DBUG_EXECUTE_IF("bug11747970_raise_error",
5313 {
5314 if (!err)
5315 {
5316 my_error(ER_UNKNOWN_ERROR, MYF(0));
5317 return true;
5318 }
5319 });
5320
5321 if (err)
5322 {
5323 table->file->print_error(err, MYF(0));
5324 return true;
5325 }
5326 table->quick_keys.clear_all();
5327 table->possible_quick_keys.clear_all();
5328 table->reginfo.not_exists_optimize= false;
5329 memset(table->const_key_parts, 0, sizeof(key_part_map)*table->s->keys);
5330 all_table_map|= tl->map();
5331 tab->set_join(this);
5332
5333 tab->dependent= tl->dep_tables; // Initialize table dependencies
5334 if (tl->schema_table)
5335 table->file->stats.records= 2;
5336 table->quick_condition_rows= table->file->stats.records;
5337
5338 tab->init_join_cond_ref(tl);
5339
5340 if (tl->outer_join_nest())
5341 {
5342 // tab belongs to a nested join, maybe to several embedding joins
5343 tab->embedding_map= 0;
5344 for (TABLE_LIST *embedding= tl->embedding;
5345 embedding;
5346 embedding= embedding->embedding)
5347 {
5348 NESTED_JOIN *const nested_join= embedding->nested_join;
5349 tab->embedding_map|= nested_join->nj_map;
5350 tab->dependent|= embedding->dep_tables;
5351 }
5352 }
5353 else if (tab->join_cond())
5354 {
5355 // tab is the only inner table of an outer join
5356 tab->embedding_map= 0;
5357 for (TABLE_LIST *embedding= tl->embedding;
5358 embedding;
5359 embedding= embedding->embedding)
5360 tab->embedding_map|= embedding->nested_join->nj_map;
5361 }
5362 tables++; // Count number of initialized tables
5363 }
5364
5365 primary_tables= tables;
5366 *best_ref_p= NULL; // Last element of array must be NULL
5367
5368 return false;
5369 }
5370
5371
5372 /**
5373 Propagate dependencies between tables due to outer join relations.
5374
5375 @returns false if success, true if error
5376
5377 Build transitive closure for relation 'to be dependent on'.
5378 This will speed up the plan search for many cases with outer joins,
5379 as well as allow us to catch illegal cross references.
5380 Warshall's algorithm is used to build the transitive closure.
5381 As we may restart the outer loop upto 'table_count' times, the
5382 complexity of the algorithm is O((number of tables)^3).
5383 However, most of the iterations will be shortcircuited when
5384 there are no dependencies to propagate.
5385 */
5386
propagate_dependencies()5387 bool JOIN::propagate_dependencies()
5388 {
5389 for (uint i= 0; i < tables; i++)
5390 {
5391 if (!join_tab[i].dependent)
5392 continue;
5393
5394 // Add my dependencies to other tables depending on me
5395 uint j;
5396 JOIN_TAB *tab;
5397 for (j= 0, tab= join_tab; j < tables; j++, tab++)
5398 {
5399 if (tab->dependent & join_tab[i].table_ref->map())
5400 {
5401 const table_map was_dependent= tab->dependent;
5402 tab->dependent|= join_tab[i].dependent;
5403 /*
5404 If we change dependencies for a table we already have
5405 processed: Redo dependency propagation from this table.
5406 */
5407 if (i > j && tab->dependent != was_dependent)
5408 {
5409 i= j-1;
5410 break;
5411 }
5412 }
5413 }
5414 }
5415
5416 JOIN_TAB *const tab_end= join_tab + tables;
5417 for (JOIN_TAB *tab= join_tab; tab < tab_end; tab++)
5418 {
5419 /*
5420 Catch illegal cross references for outer joins.
5421 This could happen before WL#2486 was implemented in 5.0, but should no
5422 longer be possible.
5423 Thus, an assert has been added should this happen again.
5424 @todo Remove the error check below.
5425 */
5426 assert(!(tab->dependent & tab->table_ref->map()));
5427
5428 if (tab->dependent & tab->table_ref->map())
5429 {
5430 tables= 0; // Don't use join->table
5431 primary_tables= 0;
5432 my_message(ER_WRONG_OUTER_JOIN, ER(ER_WRONG_OUTER_JOIN), MYF(0));
5433 return true;
5434 }
5435
5436 tab->key_dependent= tab->dependent;
5437 }
5438
5439 return false;
5440 }
5441
5442
5443 /**
5444 Extract const tables based on row counts.
5445
5446 @returns false if success, true if error
5447
5448 This extraction must be done for each execution.
5449 Tables containing exactly zero or one rows are marked as const, but
5450 notice the additional constraints checked below.
5451 Tables that are extracted have their rows read before actual execution
5452 starts and are placed in the beginning of the join_tab array.
5453 Thus, they do not take part in join order optimization process,
5454 which can significantly reduce the optimization time.
5455 The data read from these tables can also be regarded as "constant"
5456 throughout query execution, hence the column values can be used for
5457 additional constant propagation and extraction of const tables based
5458 on eq-ref properties.
5459
5460 The tables are given the type JT_SYSTEM.
5461 */
5462
extract_const_tables()5463 bool JOIN::extract_const_tables()
5464 {
5465 enum enum_const_table_extraction
5466 {
5467 extract_no_table= 0,
5468 extract_empty_table= 1,
5469 extract_const_table= 2
5470 };
5471
5472 JOIN_TAB *const tab_end= join_tab + tables;
5473 for (JOIN_TAB *tab= join_tab; tab < tab_end; tab++)
5474 {
5475 TABLE *const table= tab->table();
5476 TABLE_LIST *const tl= tab->table_ref;
5477 enum enum_const_table_extraction extract_method= extract_const_table;
5478
5479 const bool all_partitions_pruned_away= table->all_partitions_pruned_away;
5480
5481 if (tl->outer_join_nest())
5482 {
5483 /*
5484 Table belongs to a nested join, no candidate for const table extraction.
5485 */
5486 extract_method= extract_no_table;
5487 }
5488 else if (tl->embedding && tl->embedding->sj_cond())
5489 {
5490 /*
5491 Table belongs to a semi-join.
5492 We do not currently pull out const tables from semi-join nests.
5493 */
5494 extract_method= extract_no_table;
5495 }
5496 else if (tab->join_cond())
5497 {
5498 // tab is the only inner table of an outer join, extract empty tables
5499 extract_method= extract_empty_table;
5500 }
5501 switch (extract_method)
5502 {
5503 case extract_no_table:
5504 break;
5505
5506 case extract_empty_table:
5507 // Extract tables with zero rows, but only if statistics are exact
5508 if ((table->file->stats.records == 0 ||
5509 all_partitions_pruned_away) &&
5510 (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT))
5511 mark_const_table(tab, NULL);
5512 break;
5513
5514 case extract_const_table:
5515 /*
5516 Extract tables with zero or one rows, but do not extract tables that
5517 1. are dependent upon other tables, or
5518 2. have no exact statistics, or
5519 3. are full-text searched
5520 */
5521 if ((table->s->system ||
5522 table->file->stats.records <= 1 ||
5523 all_partitions_pruned_away) &&
5524 !tab->dependent && // 1
5525 (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) && // 2
5526 !table->fulltext_searched) // 3
5527 mark_const_table(tab, NULL);
5528 break;
5529 }
5530 }
5531
5532 // Read const tables (tables matching no more than 1 rows)
5533 if (!const_tables)
5534 return false;
5535
5536 for (POSITION *p_pos= positions, *p_end= p_pos + const_tables;
5537 p_pos < p_end;
5538 p_pos++)
5539 {
5540 JOIN_TAB *const tab= p_pos->table;
5541 const int status= join_read_const_table(tab, p_pos);
5542 if (status > 0)
5543 return true;
5544 else if (status == 0)
5545 {
5546 found_const_table_map|= tab->table_ref->map();
5547 tab->table_ref->optimized_away= true;
5548 }
5549 }
5550
5551 return false;
5552 }
5553
5554 /**
5555 Extract const tables based on functional dependencies.
5556
5557 @returns false if success, true if error
5558
5559 This extraction must be done for each execution.
5560
5561 Mark as const the tables that
5562 - are functionally dependent on constant values, or
5563 - are inner tables of an outer join and contain exactly zero or one rows
5564
5565 Tables that are extracted have their rows read before actual execution
5566 starts and are placed in the beginning of the join_tab array, just as
5567 described for JOIN::extract_const_tables().
5568
5569 The tables are given the type JT_CONST.
5570 */
5571
extract_func_dependent_tables()5572 bool JOIN::extract_func_dependent_tables()
5573 {
5574 // loop until no more const tables are found
5575 bool ref_changed;
5576 table_map found_ref;
5577 do
5578 {
5579 more_const_tables_found:
5580 ref_changed = false;
5581 found_ref= 0;
5582
5583 // Loop over all tables that are not already determined to be const
5584 for (JOIN_TAB **pos= best_ref + const_tables; *pos; pos++)
5585 {
5586 JOIN_TAB *const tab= *pos;
5587 TABLE *const table= tab->table();
5588 TABLE_LIST *const tl= tab->table_ref;
5589 /*
5590 If equi-join condition by a key is null rejecting and after a
5591 substitution of a const table the key value happens to be null
5592 then we can state that there are no matches for this equi-join.
5593 */
5594 Key_use *keyuse= tab->keyuse();
5595 if (keyuse && tab->join_cond() && !tab->embedding_map)
5596 {
5597 /*
5598 When performing an outer join operation if there are no matching rows
5599 for the single row of the outer table all the inner tables are to be
5600 null complemented and thus considered as constant tables.
5601 Here we apply this consideration to the case of outer join operations
5602 with a single inner table only because the case with nested tables
5603 would require a more thorough analysis.
5604 TODO. Apply single row substitution to null complemented inner tables
5605 for nested outer join operations.
5606 */
5607 while (keyuse->table_ref == tl)
5608 {
5609 if (!(keyuse->val->used_tables() & ~const_table_map) &&
5610 keyuse->val->is_null() && keyuse->null_rejecting)
5611 {
5612 table->set_null_row();
5613 table->const_table= true;
5614 found_const_table_map|= tl->map();
5615 mark_const_table(tab, keyuse);
5616 goto more_const_tables_found;
5617 }
5618 keyuse++;
5619 }
5620 }
5621
5622 if (tab->dependent) // If dependent on some table
5623 {
5624 // All dependent tables must be const
5625 if (tab->dependent & ~const_table_map)
5626 continue;
5627 /*
5628 Mark a dependent table as constant if
5629 1. it has exactly zero or one rows (it is a system table), and
5630 2. it is not within a nested outer join, and
5631 3. it does not have an expensive outer join condition.
5632 This is because we have to determine whether an outer-joined table
5633 has a real row or a null-extended row in the optimizer phase.
5634 We have no possibility to evaluate its join condition at
5635 execution time, when it is marked as a system table.
5636 */
5637 if (table->file->stats.records <= 1L && // 1
5638 (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) && // 1
5639 !tl->outer_join_nest() && // 2
5640 !(tab->join_cond() && tab->join_cond()->is_expensive())) // 3
5641 { // system table
5642 mark_const_table(tab, NULL);
5643 const int status=
5644 join_read_const_table(tab, positions + const_tables - 1);
5645 if (status > 0)
5646 return true;
5647 else if (status == 0)
5648 found_const_table_map|= tl->map();
5649 continue;
5650 }
5651 }
5652
5653 // Check if table can be read by key or table only uses const refs
5654
5655 if ((keyuse= tab->keyuse()))
5656 {
5657 while (keyuse->table_ref == tl)
5658 {
5659 Key_use *const start_keyuse= keyuse;
5660 const uint key= keyuse->key;
5661 tab->keys().set_bit(key); // QQ: remove this ?
5662
5663 table_map refs= 0;
5664 key_map const_ref, eq_part;
5665 do
5666 {
5667 if (keyuse->val->type() != Item::NULL_ITEM && !keyuse->optimize)
5668 {
5669 if (!((~found_const_table_map) & keyuse->used_tables))
5670 const_ref.set_bit(keyuse->keypart);
5671 else
5672 refs|= keyuse->used_tables;
5673 eq_part.set_bit(keyuse->keypart);
5674 }
5675 keyuse++;
5676 } while (keyuse->table_ref == tl && keyuse->key == key);
5677
5678 /*
5679 Extract const tables with proper key dependencies.
5680 Exclude tables that
5681 1. are full-text searched, or
5682 2. are part of nested outer join, or
5683 3. are part of semi-join, or
5684 4. have an expensive outer join condition.
5685 5. are blocked by handler for const table optimize.
5686 */
5687 if (eq_part.is_prefix(table->key_info[key].user_defined_key_parts) &&
5688 !table->fulltext_searched && // 1
5689 !tl->outer_join_nest() && // 2
5690 !(tl->embedding && tl->embedding->sj_cond()) && // 3
5691 !(tab->join_cond() && tab->join_cond()->is_expensive()) &&// 4
5692 !(table->file->ha_table_flags() & HA_BLOCK_CONST_TABLE)) // 5
5693 {
5694 if (table->key_info[key].flags & HA_NOSAME)
5695 {
5696 if (const_ref == eq_part)
5697 { // Found everything for ref.
5698 ref_changed = true;
5699 mark_const_table(tab, start_keyuse);
5700 if (create_ref_for_key(this, tab, start_keyuse,
5701 found_const_table_map))
5702 return true;
5703 const int status=
5704 join_read_const_table(tab, positions + const_tables - 1);
5705 if (status > 0)
5706 return true;
5707 else if (status == 0)
5708 found_const_table_map|= tl->map();
5709 break;
5710 }
5711 else
5712 found_ref|= refs; // Table is const if all refs are const
5713 }
5714 else if (const_ref == eq_part)
5715 tab->const_keys.set_bit(key);
5716 }
5717 }
5718 }
5719 }
5720 } while ((const_table_map & found_ref) && ref_changed);
5721
5722 return false;
5723 }
5724
5725 /**
5726 Update info on indexes that can be used for search lookups as
5727 reading const tables may has added new sargable predicates.
5728 */
5729
update_sargable_from_const(SARGABLE_PARAM * sargables)5730 void JOIN::update_sargable_from_const(SARGABLE_PARAM *sargables)
5731 {
5732 for ( ; sargables->field; sargables++)
5733 {
5734 Field *const field= sargables->field;
5735 JOIN_TAB *const tab= field->table->reginfo.join_tab;
5736 key_map possible_keys= field->key_start;
5737 possible_keys.intersect(field->table->keys_in_use_for_query);
5738 bool is_const= true;
5739 for (uint j= 0; j < sargables->num_values; j++)
5740 is_const&= sargables->arg_value[j]->const_item();
5741 if (is_const)
5742 {
5743 tab->const_keys.merge(possible_keys);
5744 tab->keys().merge(possible_keys);
5745 }
5746 }
5747 }
5748
5749
5750 /**
5751 Estimate the number of matched rows for each joined table.
5752 Set up range scan for tables that have proper predicates.
5753
5754 @returns false if success, true if error
5755 */
5756
estimate_rowcount()5757 bool JOIN::estimate_rowcount()
5758 {
5759 Opt_trace_context *const trace= &thd->opt_trace;
5760 Opt_trace_object trace_wrapper(trace);
5761 Opt_trace_array trace_records(trace, "rows_estimation");
5762
5763 JOIN_TAB *const tab_end= join_tab + tables;
5764 for (JOIN_TAB *tab= join_tab; tab < tab_end; tab++)
5765 {
5766 const Cost_model_table *const cost_model= tab->table()->cost_model();
5767 Opt_trace_object trace_table(trace);
5768 trace_table.add_utf8_table(tab->table_ref);
5769 if (tab->type() == JT_SYSTEM || tab->type() == JT_CONST)
5770 {
5771 trace_table.add("rows", 1).add("cost", 1)
5772 .add_alnum("table_type", (tab->type() == JT_SYSTEM) ? "system": "const")
5773 .add("empty", tab->table()->has_null_row());
5774
5775 // Only one matching row and one block to read
5776 tab->set_records(tab->found_records= 1);
5777 tab->worst_seeks= cost_model->page_read_cost(1.0);
5778 tab->read_time= static_cast<ha_rows>(tab->worst_seeks);
5779 continue;
5780 }
5781 // Approximate number of found rows and cost to read them
5782 tab->set_records(tab->found_records= tab->table()->file->stats.records);
5783 const Cost_estimate table_scan_time= tab->table()->file->table_scan_cost();
5784 tab->read_time= static_cast<ha_rows>(table_scan_time.total_cost());
5785
5786 /*
5787 Set a max value for the cost of seek operations we can expect
5788 when using key lookup. This can't be too high as otherwise we
5789 are likely to use table scan.
5790 */
5791 tab->worst_seeks=
5792 min(cost_model->page_read_cost((double) tab->found_records / 10),
5793 (double) tab->read_time * 3);
5794 const double min_worst_seek= cost_model->page_read_cost(2.0);
5795 if (tab->worst_seeks < min_worst_seek) // Fix for small tables
5796 tab->worst_seeks= min_worst_seek;
5797
5798 /*
5799 Add to tab->const_keys those indexes for which all group fields or
5800 all select distinct fields participate in one index.
5801 */
5802 add_group_and_distinct_keys(this, tab);
5803
5804 /*
5805 Perform range analysis if there are keys it could use (1).
5806 Don't do range analysis if on the inner side of an outer join (2).
5807 Do range analysis if on the inner side of a semi-join (3).
5808 */
5809 TABLE_LIST *const tl= tab->table_ref;
5810 if (!tab->const_keys.is_clear_all() && // (1)
5811 (!tl->embedding || // (2)
5812 (tl->embedding && tl->embedding->sj_cond()))) // (3)
5813 {
5814 /*
5815 This call fills tab->quick() with the best QUICK access method
5816 possible for this table, and only if it's better than table scan.
5817 It also fills tab->needed_reg.
5818 */
5819 ha_rows records= get_quick_record_count(thd, tab, row_limit);
5820
5821 if (records == 0 && thd->is_error())
5822 return true;
5823
5824 /*
5825 Check for "impossible range", but make sure that we do not attempt
5826 to mark semi-joined tables as "const" (only semi-joined tables that
5827 are functionally dependent can be marked "const", and subsequently
5828 pulled out of their semi-join nests).
5829 */
5830 if (records == 0 &&
5831 tab->table()->reginfo.impossible_range &&
5832 (!(tl->embedding && tl->embedding->sj_cond())))
5833 {
5834 /*
5835 Impossible WHERE condition or join condition
5836 In case of join cond, mark that one empty NULL row is matched.
5837 In case of WHERE, don't set found_const_table_map to get the
5838 caller to abort with a zero row result.
5839 */
5840 mark_const_table(tab, NULL);
5841 tab->set_type(JT_CONST); // Override setting made in mark_const_table()
5842 if (tab->join_cond())
5843 {
5844 // Generate an empty row
5845 trace_table.add("returning_empty_null_row", true).
5846 add_alnum("cause", "impossible_on_condition");
5847 found_const_table_map|= tl->map();
5848 tab->table()->set_null_row(); // All fields are NULL
5849 }
5850 else
5851 {
5852 trace_table.add("rows", 0).
5853 add_alnum("cause", "impossible_where_condition");
5854 }
5855 }
5856 if (records != HA_POS_ERROR)
5857 {
5858 tab->found_records= records;
5859 tab->read_time= (ha_rows) (tab->quick() ?
5860 tab->quick()->cost_est.total_cost() : 0.0);
5861 }
5862 }
5863 else
5864 {
5865 Opt_trace_object(trace, "table_scan").
5866 add("rows", tab->found_records).
5867 add("cost", tab->read_time);
5868 }
5869 }
5870
5871 return false;
5872 }
5873
5874
5875 /**
5876 Set semi-join embedding join nest pointers.
5877
5878 Set pointer to embedding semi-join nest for all semi-joined tables.
5879 Note that this must be done for every table inside all semi-join nests,
5880 even for tables within outer join nests embedded in semi-join nests.
5881 A table can never be part of multiple semi-join nests, hence no
5882 ambiguities can ever occur.
5883 Note also that the pointer is not set for TABLE_LIST objects that
5884 are outer join nests within semi-join nests.
5885 */
5886
set_semijoin_embedding()5887 void JOIN::set_semijoin_embedding()
5888 {
5889 assert(!select_lex->sj_nests.is_empty());
5890
5891 JOIN_TAB *const tab_end= join_tab + primary_tables;
5892
5893 for (JOIN_TAB *tab= join_tab; tab < tab_end; tab++)
5894 {
5895 for (TABLE_LIST *tl= tab->table_ref; tl->embedding; tl= tl->embedding)
5896 {
5897 if (tl->embedding->sj_cond())
5898 {
5899 tab->emb_sj_nest= tl->embedding;
5900 break;
5901 }
5902 }
5903 }
5904 }
5905
5906
5907 /**
5908 @brief Check if semijoin's compared types allow materialization.
5909
5910 @param[inout] sj_nest Semi-join nest containing information about correlated
5911 expressions. Set nested_join->sjm.scan_allowed to TRUE if
5912 MaterializeScan strategy allowed. Set nested_join->sjm.lookup_allowed
5913 to TRUE if MaterializeLookup strategy allowed
5914
5915 @details
5916 This is a temporary fix for BUG#36752.
5917
5918 There are two subquery materialization strategies for semijoin:
5919
5920 1. Materialize and do index lookups in the materialized table. See
5921 BUG#36752 for description of restrictions we need to put on the
5922 compared expressions.
5923
5924 In addition, since indexes are not supported for BLOB columns,
5925 this strategy can not be used if any of the columns in the
5926 materialized table will be BLOB/GEOMETRY columns. (Note that
5927 also columns for non-BLOB values that may be greater in size
5928 than CONVERT_IF_BIGGER_TO_BLOB, will be represented as BLOB
5929 columns.)
5930
5931 2. Materialize and then do a full scan of the materialized table.
5932 The same criteria as for MaterializeLookup are applied, except that
5933 BLOB/GEOMETRY columns are allowed.
5934 */
5935
5936 static
semijoin_types_allow_materialization(TABLE_LIST * sj_nest)5937 void semijoin_types_allow_materialization(TABLE_LIST *sj_nest)
5938 {
5939 DBUG_ENTER("semijoin_types_allow_materialization");
5940
5941 assert(sj_nest->nested_join->sj_outer_exprs.elements ==
5942 sj_nest->nested_join->sj_inner_exprs.elements);
5943
5944 if (sj_nest->nested_join->sj_outer_exprs.elements > MAX_REF_PARTS)
5945 {
5946 sj_nest->nested_join->sjm.scan_allowed= false;
5947 sj_nest->nested_join->sjm.lookup_allowed= false;
5948 DBUG_VOID_RETURN;
5949 }
5950
5951 List_iterator<Item> it1(sj_nest->nested_join->sj_outer_exprs);
5952 List_iterator<Item> it2(sj_nest->nested_join->sj_inner_exprs);
5953
5954 sj_nest->nested_join->sjm.scan_allowed= true;
5955 sj_nest->nested_join->sjm.lookup_allowed= true;
5956
5957 bool blobs_involved= false;
5958 Item *outer, *inner;
5959 uint total_lookup_index_length= 0;
5960 uint max_key_length;
5961 uint max_key_part_length;
5962 /*
5963 Maximum lengths for keys and key parts that are supported by
5964 the temporary table storage engine(s).
5965 */
5966 get_max_key_and_part_length(&max_key_length,
5967 &max_key_part_length);
5968 while (outer= it1++, inner= it2++)
5969 {
5970 assert(outer->real_item() && inner->real_item());
5971 if (!types_allow_materialization(outer, inner))
5972 {
5973 sj_nest->nested_join->sjm.scan_allowed= false;
5974 sj_nest->nested_join->sjm.lookup_allowed= false;
5975 DBUG_VOID_RETURN;
5976 }
5977 blobs_involved|= inner->is_blob_field();
5978
5979 // Calculate the index length of materialized table
5980 const uint lookup_index_length= get_key_length_tmp_table(inner);
5981 if (lookup_index_length > max_key_part_length)
5982 sj_nest->nested_join->sjm.lookup_allowed= false;
5983 total_lookup_index_length+= lookup_index_length ;
5984 }
5985 if (total_lookup_index_length > max_key_length)
5986 sj_nest->nested_join->sjm.lookup_allowed= false;
5987
5988 if (blobs_involved)
5989 sj_nest->nested_join->sjm.lookup_allowed= false;
5990
5991 if (sj_nest->embedding)
5992 {
5993 assert(sj_nest->embedding->join_cond_optim());
5994 /*
5995 There are two issues that prevent materialization strategy from being
5996 used when a semi-join nest is on the inner side of an outer join:
5997 1. If the semi-join contains dependencies to outer tables,
5998 materialize-scan strategy cannot be used.
5999 2. Make sure that executor is able to evaluate triggered conditions
6000 for semi-join materialized tables. It should be correct, but needs
6001 verification.
6002 TODO: Remove this limitation!
6003 Handle this by disabling materialization strategies:
6004 */
6005 sj_nest->nested_join->sjm.scan_allowed= false;
6006 sj_nest->nested_join->sjm.lookup_allowed= false;
6007 DBUG_VOID_RETURN;
6008 }
6009
6010 DBUG_PRINT("info",("semijoin_types_allow_materialization: ok, allowed"));
6011
6012 DBUG_VOID_RETURN;
6013 }
6014
6015
6016 /*****************************************************************************
6017 Create JOIN_TABS, make a guess about the table types,
6018 Approximate how many records will be used in each table
6019 *****************************************************************************/
6020
6021 /**
6022 Returns estimated number of rows that could be fetched by given
6023 access method.
6024
6025 The function calls the range optimizer to estimate the cost of the
6026 cheapest QUICK_* index access method to scan one or several of the
6027 'keys' using the conditions 'select->cond'. The range optimizer
6028 compares several different types of 'quick select' methods (range
6029 scan, index merge, loose index scan) and selects the cheapest one.
6030
6031 If the best index access method is cheaper than a table- and an index
6032 scan, then the range optimizer also constructs the corresponding
6033 QUICK_* object and assigns it to select->quick. In most cases this
6034 is the QUICK_* object used at later (optimization and execution)
6035 phases.
6036
6037 @param thd Session that runs the query.
6038 @param tab JOIN_TAB of source table.
6039 @param limit maximum number of rows to select.
6040
6041 @note
6042 In case of valid range, a QUICK_SELECT_I object will be constructed and
6043 saved in select->quick.
6044
6045 @return Estimated number of result rows selected from 'tab'.
6046
6047 @retval HA_POS_ERROR For derived tables/views or if an error occur.
6048 @retval 0 If impossible query (i.e. certainly no rows will be
6049 selected.)
6050 */
get_quick_record_count(THD * thd,JOIN_TAB * tab,ha_rows limit)6051 static ha_rows get_quick_record_count(THD *thd, JOIN_TAB *tab, ha_rows limit)
6052 {
6053 DBUG_ENTER("get_quick_record_count");
6054 uchar buff[STACK_BUFF_ALLOC];
6055 if (check_stack_overrun(thd, STACK_MIN_SIZE, buff))
6056 DBUG_RETURN(0); // Fatal error flag is set
6057
6058 TABLE_LIST *const tl= tab->table_ref;
6059
6060 // Derived tables aren't filled yet, so no stats are available.
6061 if (!tl->uses_materialization())
6062 {
6063 QUICK_SELECT_I *qck;
6064 int error= test_quick_select(thd,
6065 tab->const_keys,
6066 0, //empty table_map
6067 limit,
6068 false, //don't force quick range
6069 ORDER::ORDER_NOT_RELEVANT, tab,
6070 tab->join_cond() ? tab->join_cond() :
6071 tab->join()->where_cond,
6072 &tab->needed_reg, &qck, tab->table()->force_index);
6073 tab->set_quick(qck);
6074
6075 if (error == 1)
6076 DBUG_RETURN(qck->records);
6077 if (error == -1)
6078 {
6079 tl->table->reginfo.impossible_range=1;
6080 DBUG_RETURN(0);
6081 }
6082 DBUG_PRINT("warning",("Couldn't use record count on const keypart"));
6083 }
6084 else if (tl->materializable_is_const())
6085 {
6086 DBUG_RETURN(tl->derived_unit()->query_result()->estimated_rowcount);
6087 }
6088 DBUG_RETURN(HA_POS_ERROR);
6089 }
6090
6091 /*
6092 Get estimated record length for semi-join materialization temptable
6093
6094 SYNOPSIS
6095 get_tmp_table_rec_length()
6096 items IN subquery's select list.
6097
6098 DESCRIPTION
6099 Calculate estimated record length for semi-join materialization
6100 temptable. It's an estimate because we don't follow every bit of
6101 create_tmp_table()'s logic. This isn't necessary as the return value of
6102 this function is used only for cost calculations.
6103
6104 RETURN
6105 Length of the temptable record, in bytes
6106 */
6107
get_tmp_table_rec_length(List<Item> & items)6108 static uint get_tmp_table_rec_length(List<Item> &items)
6109 {
6110 uint len= 0;
6111 Item *item;
6112 List_iterator<Item> it(items);
6113 while ((item= it++))
6114 {
6115 switch (item->result_type()) {
6116 case REAL_RESULT:
6117 len += sizeof(double);
6118 break;
6119 case INT_RESULT:
6120 if (item->max_length >= (MY_INT32_NUM_DECIMAL_DIGITS - 1))
6121 len += 8;
6122 else
6123 len += 4;
6124 break;
6125 case STRING_RESULT:
6126 /* DATE/TIME and GEOMETRY fields have STRING_RESULT result type. */
6127 if (item->is_temporal() || item->field_type() == MYSQL_TYPE_GEOMETRY)
6128 len += 8;
6129 else
6130 len += item->max_length;
6131 break;
6132 case DECIMAL_RESULT:
6133 len += 10;
6134 break;
6135 case ROW_RESULT:
6136 default:
6137 assert(0); /* purecov: deadcode */
6138 break;
6139 }
6140 }
6141 return len;
6142 }
6143
6144
6145 /**
6146 Writes to the optimizer trace information about dependencies between
6147 tables.
6148 @param trace optimizer trace
6149 @param join_tabs all JOIN_TABs of the join
6150 @param table_count how many JOIN_TABs in the 'join_tabs' array
6151 */
trace_table_dependencies(Opt_trace_context * trace,JOIN_TAB * join_tabs,uint table_count)6152 static void trace_table_dependencies(Opt_trace_context * trace,
6153 JOIN_TAB *join_tabs,
6154 uint table_count)
6155 {
6156 Opt_trace_object trace_wrapper(trace);
6157 Opt_trace_array trace_dep(trace, "table_dependencies");
6158 for (uint i= 0 ; i < table_count ; i++)
6159 {
6160 TABLE_LIST *table_ref= join_tabs[i].table_ref;
6161 Opt_trace_object trace_one_table(trace);
6162 trace_one_table.add_utf8_table(table_ref).
6163 add("row_may_be_null", table_ref->table->is_nullable());
6164 const table_map map= table_ref->map();
6165 assert(map < (1ULL << table_count));
6166 for (uint j= 0; j < table_count; j++)
6167 {
6168 if (map & (1ULL << j))
6169 {
6170 trace_one_table.add("map_bit", j);
6171 break;
6172 }
6173 }
6174 Opt_trace_array depends_on(trace, "depends_on_map_bits");
6175 // RAND_TABLE_BIT may be in join_tabs[i].dependent, so we test all 64 bits
6176 compile_time_assert(sizeof(table_ref->map()) <= 64);
6177 for (uint j= 0; j < 64; j++)
6178 {
6179 if (join_tabs[i].dependent & (1ULL << j))
6180 depends_on.add(j);
6181 }
6182 }
6183 }
6184
6185
6186 /**
6187 Add to join_tab[i]->condition() "table.field IS NOT NULL" conditions
6188 we've inferred from ref/eq_ref access performed.
6189
6190 This function is a part of "Early NULL-values filtering for ref access"
6191 optimization.
6192
6193 Example of this optimization:
6194 For query SELECT * FROM t1,t2 WHERE t2.key=t1.field @n
6195 and plan " any-access(t1), ref(t2.key=t1.field) " @n
6196 add "t1.field IS NOT NULL" to t1's table condition. @n
6197
6198 Description of the optimization:
6199
6200 We look through equalities choosen to perform ref/eq_ref access,
6201 pick equalities that have form "tbl.part_of_key = othertbl.field"
6202 (where othertbl is a non-const table and othertbl.field may be NULL)
6203 and add them to conditions on correspoding tables (othertbl in this
6204 example).
6205
6206 Exception from that is the case when referred_tab->join != join.
6207 I.e. don't add NOT NULL constraints from any embedded subquery.
6208 Consider this query:
6209 @code
6210 SELECT A.f2 FROM t1 LEFT JOIN t2 A ON A.f2 = f1
6211 WHERE A.f3=(SELECT MIN(f3) FROM t2 C WHERE A.f4 = C.f4) OR A.f3 IS NULL;
6212 @endcode
6213 Here condition A.f3 IS NOT NULL is going to be added to the WHERE
6214 condition of the embedding query.
6215 Another example:
6216 SELECT * FROM t10, t11 WHERE (t10.a < 10 OR t10.a IS NULL)
6217 AND t11.b <=> t10.b AND (t11.a = (SELECT MAX(a) FROM t12
6218 WHERE t12.b = t10.a ));
6219 Here condition t10.a IS NOT NULL is going to be added.
6220 In both cases addition of NOT NULL condition will erroneously reject
6221 some rows of the result set.
6222 referred_tab->join != join constraint would disallow such additions.
6223
6224 This optimization doesn't affect the choices that ref, range, or join
6225 optimizer make. This was intentional because this was added after 4.1
6226 was GA.
6227
6228 Implementation overview
6229 1. update_ref_and_keys() accumulates info about null-rejecting
6230 predicates in in Key_field::null_rejecting
6231 1.1 add_key_part saves these to Key_use.
6232 2. create_ref_for_key copies them to TABLE_REF.
6233 3. add_not_null_conds adds "x IS NOT NULL" to join_tab->m_condition of
6234 appropiate JOIN_TAB members.
6235 */
6236
add_not_null_conds(JOIN * join)6237 static void add_not_null_conds(JOIN *join)
6238 {
6239 DBUG_ENTER("add_not_null_conds");
6240 ASSERT_BEST_REF_IN_JOIN_ORDER(join);
6241 for (uint i=join->const_tables ; i < join->tables ; i++)
6242 {
6243 JOIN_TAB *const tab= join->best_ref[i];
6244 if ((tab->type() == JT_REF || tab->type() == JT_EQ_REF ||
6245 tab->type() == JT_REF_OR_NULL) &&
6246 !tab->table()->is_nullable())
6247 {
6248 for (uint keypart= 0; keypart < tab->ref().key_parts; keypart++)
6249 {
6250 if (tab->ref().null_rejecting & ((key_part_map)1 << keypart))
6251 {
6252 Item *item= tab->ref().items[keypart];
6253 Item *notnull;
6254 Item *real= item->real_item();
6255 assert(real->type() == Item::FIELD_ITEM);
6256 Item_field *not_null_item= (Item_field*)real;
6257 JOIN_TAB *referred_tab= not_null_item->field->table->reginfo.join_tab;
6258 /*
6259 For UPDATE queries such as:
6260 UPDATE t1 SET t1.f2=(SELECT MAX(t2.f4) FROM t2 WHERE t2.f3=t1.f1);
6261 not_null_item is the t1.f1, but it's referred_tab is 0.
6262 */
6263 if (!referred_tab || referred_tab->join() != join)
6264 continue;
6265 if (!(notnull= new Item_func_isnotnull(not_null_item)))
6266 DBUG_VOID_RETURN;
6267 /*
6268 We need to do full fix_fields() call here in order to have correct
6269 notnull->const_item(). This is needed e.g. by test_quick_select
6270 when it is called from make_join_select after this function is
6271 called.
6272 */
6273 if (notnull->fix_fields(join->thd, ¬null))
6274 DBUG_VOID_RETURN;
6275 DBUG_EXECUTE("where",print_where(notnull,
6276 referred_tab->table()->alias,
6277 QT_ORDINARY););
6278 referred_tab->and_with_condition(notnull);
6279 }
6280 }
6281 }
6282 }
6283 DBUG_VOID_RETURN;
6284 }
6285
6286
6287 /**
6288 Check if given expression only uses fields covered by index #keyno in the
6289 table tbl. The expression can use any fields in any other tables.
6290
6291 The expression is guaranteed not to be AND or OR - those constructs are
6292 handled outside of this function.
6293
6294 Restrict some function types from being pushed down to storage engine:
6295 a) Don't push down the triggered conditions. Nested outer joins execution
6296 code may need to evaluate a condition several times (both triggered and
6297 untriggered).
6298 b) Stored functions contain a statement that might start new operations (like
6299 DML statements) from within the storage engine. This does not work against
6300 all SEs.
6301 c) Subqueries might contain nested subqueries and involve more tables.
6302
6303 @param item Expression to check
6304 @param tbl The table having the index
6305 @param keyno The index number
6306 @param other_tbls_ok TRUE <=> Fields of other non-const tables are allowed
6307
6308 @return false if No, true if Yes
6309 */
6310
uses_index_fields_only(Item * item,TABLE * tbl,uint keyno,bool other_tbls_ok)6311 bool uses_index_fields_only(Item *item, TABLE *tbl, uint keyno,
6312 bool other_tbls_ok)
6313 {
6314 // Restrictions b and c.
6315 if (item->has_stored_program() || item->has_subquery())
6316 return false;
6317
6318 if (item->const_item())
6319 return true;
6320
6321 const Item::Type item_type= item->type();
6322
6323 switch (item_type) {
6324 case Item::FUNC_ITEM:
6325 {
6326 Item_func *item_func= (Item_func*)item;
6327 const Item_func::Functype func_type= item_func->functype();
6328
6329 /*
6330 Restriction a.
6331 TODO: Consider cloning the triggered condition and using the copies
6332 for:
6333 1. push the first copy down, to have most restrictive index condition
6334 possible.
6335 2. Put the second copy into tab->m_condition.
6336 */
6337 if (func_type == Item_func::TRIG_COND_FUNC)
6338 return false;
6339
6340 /* This is a function, apply condition recursively to arguments */
6341 if (item_func->argument_count() > 0)
6342 {
6343 Item **item_end= (item_func->arguments()) + item_func->argument_count();
6344 for (Item **child= item_func->arguments(); child != item_end; child++)
6345 {
6346 if (!uses_index_fields_only(*child, tbl, keyno, other_tbls_ok))
6347 return FALSE;
6348 }
6349 }
6350 return TRUE;
6351 }
6352 case Item::COND_ITEM:
6353 {
6354 /*
6355 This is a AND/OR condition. Regular AND/OR clauses are handled by
6356 make_cond_for_index() which will chop off the part that can be
6357 checked with index. This code is for handling non-top-level AND/ORs,
6358 e.g. func(x AND y).
6359 */
6360 List_iterator<Item> li(*((Item_cond*)item)->argument_list());
6361 Item *item;
6362 while ((item=li++))
6363 {
6364 if (!uses_index_fields_only(item, tbl, keyno, other_tbls_ok))
6365 return FALSE;
6366 }
6367 return TRUE;
6368 }
6369 case Item::FIELD_ITEM:
6370 {
6371 Item_field *item_field= (Item_field*)item;
6372 if (item_field->field->table != tbl)
6373 return other_tbls_ok;
6374 /*
6375 The below is probably a repetition - the first part checks the
6376 other two, but let's play it safe:
6377 */
6378 return item_field->field->part_of_key.is_set(keyno) &&
6379 item_field->field->type() != MYSQL_TYPE_GEOMETRY &&
6380 item_field->field->type() != MYSQL_TYPE_BLOB;
6381 }
6382 case Item::REF_ITEM:
6383 return uses_index_fields_only(item->real_item(), tbl, keyno,
6384 other_tbls_ok);
6385 default:
6386 return FALSE; /* Play it safe, don't push unknown non-const items */
6387 }
6388 }
6389
6390
6391 /**
6392 Optimize semi-join nests that could be run with sj-materialization
6393
6394 @param join The join to optimize semi-join nests for
6395
6396 @details
6397 Optimize each of the semi-join nests that can be run with
6398 materialization. For each of the nests, we
6399 - Generate the best join order for this "sub-join" and remember it;
6400 - Remember the sub-join execution cost (it's part of materialization
6401 cost);
6402 - Calculate other costs that will be incurred if we decide
6403 to use materialization strategy for this semi-join nest.
6404
6405 All obtained information is saved and will be used by the main join
6406 optimization pass.
6407
6408 @return false if successful, true if error
6409 */
6410
optimize_semijoin_nests_for_materialization(JOIN * join)6411 static bool optimize_semijoin_nests_for_materialization(JOIN *join)
6412 {
6413 DBUG_ENTER("optimize_semijoin_nests_for_materialization");
6414 List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests);
6415 TABLE_LIST *sj_nest;
6416 Opt_trace_context * const trace= &join->thd->opt_trace;
6417
6418 while ((sj_nest= sj_list_it++))
6419 {
6420 /* As a precaution, reset pointers that were used in prior execution */
6421 sj_nest->nested_join->sjm.positions= NULL;
6422
6423 /* Calculate the cost of materialization if materialization is allowed. */
6424 if (sj_nest->nested_join->sj_enabled_strategies &
6425 OPTIMIZER_SWITCH_MATERIALIZATION)
6426 {
6427 /* A semi-join nest should not contain tables marked as const */
6428 assert(!(sj_nest->sj_inner_tables & join->const_table_map));
6429
6430 Opt_trace_object trace_wrapper(trace);
6431 Opt_trace_object
6432 trace_sjmat(trace, "execution_plan_for_potential_materialization");
6433 Opt_trace_array trace_sjmat_steps(trace, "steps");
6434 /*
6435 Try semijoin materialization if the semijoin is classified as
6436 non-trivially-correlated.
6437 */
6438 if (sj_nest->nested_join->sj_corr_tables)
6439 continue;
6440 /*
6441 Check whether data types allow execution with materialization.
6442 */
6443 semijoin_types_allow_materialization(sj_nest);
6444
6445 if (!sj_nest->nested_join->sjm.scan_allowed &&
6446 !sj_nest->nested_join->sjm.lookup_allowed)
6447 continue;
6448
6449 if (Optimize_table_order(join->thd, join, sj_nest).choose_table_order())
6450 DBUG_RETURN(true);
6451 const uint n_tables= my_count_bits(sj_nest->sj_inner_tables);
6452 calculate_materialization_costs(join, sj_nest, n_tables,
6453 &sj_nest->nested_join->sjm);
6454 /*
6455 Cost data is in sj_nest->nested_join->sjm. We also need to save the
6456 plan:
6457 */
6458 if (!(sj_nest->nested_join->sjm.positions=
6459 (st_position*)join->thd->alloc(sizeof(st_position)*n_tables)))
6460 DBUG_RETURN(true);
6461 memcpy(static_cast<void*>(sj_nest->nested_join->sjm.positions),
6462 join->best_positions + join->const_tables,
6463 sizeof(st_position) * n_tables);
6464 }
6465 }
6466 DBUG_RETURN(false);
6467 }
6468
6469
6470 /*
6471 Check if table's Key_use elements have an eq_ref(outer_tables) candidate
6472
6473 SYNOPSIS
6474 find_eq_ref_candidate()
6475 tl Table to be checked
6476 sj_inner_tables Bitmap of inner tables. eq_ref(inner_table) doesn't
6477 count.
6478
6479 DESCRIPTION
6480 Check if table's Key_use elements have an eq_ref(outer_tables) candidate
6481
6482 TODO
6483 Check again if it is feasible to factor common parts with constant table
6484 search
6485
6486 RETURN
6487 TRUE - There exists an eq_ref(outer-tables) candidate
6488 FALSE - Otherwise
6489 */
6490
find_eq_ref_candidate(TABLE_LIST * tl,table_map sj_inner_tables)6491 static bool find_eq_ref_candidate(TABLE_LIST *tl, table_map sj_inner_tables)
6492 {
6493 Key_use *keyuse= tl->table->reginfo.join_tab->keyuse();
6494
6495 if (keyuse)
6496 {
6497 while (1) /* For each key */
6498 {
6499 const uint key= keyuse->key;
6500 KEY *const keyinfo= tl->table->key_info + key;
6501 key_part_map bound_parts= 0;
6502 if ((keyinfo->flags & (HA_NOSAME)) == HA_NOSAME)
6503 {
6504 do /* For all equalities on all key parts */
6505 {
6506 /* Check if this is "t.keypart = expr(outer_tables) */
6507 if (!(keyuse->used_tables & sj_inner_tables) &&
6508 !(keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL))
6509 {
6510 /*
6511 Consider only if the resulting condition does not pass a NULL
6512 value through. Especially needed for a UNIQUE index on NULLable
6513 columns where a duplicate row is possible with NULL values.
6514 */
6515 if (keyuse->null_rejecting || !keyuse->val->maybe_null ||
6516 !keyinfo->key_part[keyuse->keypart].field->maybe_null())
6517 bound_parts|= (key_part_map)1 << keyuse->keypart;
6518 }
6519 keyuse++;
6520 } while (keyuse->key == key && keyuse->table_ref == tl);
6521
6522 if (bound_parts == LOWER_BITS(uint, keyinfo->user_defined_key_parts))
6523 return true;
6524 if (keyuse->table_ref != tl)
6525 return false;
6526 }
6527 else
6528 {
6529 do
6530 {
6531 keyuse++;
6532 if (keyuse->table_ref != tl)
6533 return false;
6534 }
6535 while (keyuse->key == key);
6536 }
6537 }
6538 }
6539 return false;
6540 }
6541
6542
6543 /**
6544 Pull tables out of semi-join nests based on functional dependencies
6545
6546 @param join The join where to do the semi-join table pullout
6547
6548 @return False if successful, true if error (Out of memory)
6549
6550 @details
6551 Pull tables out of semi-join nests based on functional dependencies,
6552 ie. if a table is accessed via eq_ref(outer_tables).
6553 The function may be called several times, the caller is responsible
6554 for setting up proper key information that this function acts upon.
6555
6556 PRECONDITIONS
6557 When this function is called, the join may have several semi-join nests
6558 but it is guaranteed that one semi-join nest does not contain another.
6559 For functionally dependent tables to be pulled out, key information must
6560 have been calculated (see update_ref_and_keys()).
6561
6562 POSTCONDITIONS
6563 * Tables that were pulled out are removed from the semi-join nest they
6564 belonged to and added to the parent join nest.
6565 * For these tables, the used_tables and not_null_tables fields of
6566 the semi-join nest they belonged to will be adjusted.
6567 The semi-join nest is also marked as correlated, and
6568 sj_corr_tables and sj_depends_on are adjusted if necessary.
6569 * Semi-join nests' sj_inner_tables is set equal to used_tables
6570
6571 NOTE
6572 Table pullout may make uncorrelated subquery correlated. Consider this
6573 example:
6574
6575 ... WHERE oe IN (SELECT it1.primary_key WHERE p(it1, it2) ... )
6576
6577 here table it1 can be pulled out (we have it1.primary_key=oe which gives
6578 us functional dependency). Once it1 is pulled out, all references to it1
6579 from p(it1, it2) become references to outside of the subquery and thus
6580 make the subquery (i.e. its semi-join nest) correlated.
6581 Making the subquery (i.e. its semi-join nest) correlated prevents us from
6582 using Materialization or LooseScan to execute it.
6583 */
6584
pull_out_semijoin_tables(JOIN * join)6585 static bool pull_out_semijoin_tables(JOIN *join)
6586 {
6587 TABLE_LIST *sj_nest;
6588 DBUG_ENTER("pull_out_semijoin_tables");
6589
6590 assert(!join->select_lex->sj_nests.is_empty());
6591
6592 List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests);
6593 Opt_trace_context * const trace= &join->thd->opt_trace;
6594 Opt_trace_object trace_wrapper(trace);
6595 Opt_trace_array trace_pullout(trace, "pulled_out_semijoin_tables");
6596
6597 /* Try pulling out tables from each semi-join nest */
6598 while ((sj_nest= sj_list_it++))
6599 {
6600 table_map pulled_tables= 0;
6601 List_iterator<TABLE_LIST> child_li(sj_nest->nested_join->join_list);
6602 TABLE_LIST *tbl;
6603 /*
6604 Calculate set of tables within this semi-join nest that have
6605 other dependent tables
6606 */
6607 table_map dep_tables= 0;
6608 while ((tbl= child_li++))
6609 {
6610 TABLE *const table= tbl->table;
6611 if (table &&
6612 (table->reginfo.join_tab->dependent &
6613 sj_nest->nested_join->used_tables))
6614 dep_tables|= table->reginfo.join_tab->dependent;
6615 }
6616 /*
6617 Find which tables we can pull out based on key dependency data.
6618 Note that pulling one table out can allow us to pull out some
6619 other tables too.
6620 */
6621 bool pulled_a_table;
6622 do
6623 {
6624 pulled_a_table= FALSE;
6625 child_li.rewind();
6626 while ((tbl= child_li++))
6627 {
6628 if (tbl->table &&
6629 !(pulled_tables & tbl->map()) &&
6630 !(dep_tables & tbl->map()))
6631 {
6632 if (find_eq_ref_candidate(tbl,
6633 sj_nest->nested_join->used_tables &
6634 ~pulled_tables))
6635 {
6636 pulled_a_table= TRUE;
6637 pulled_tables |= tbl->map();
6638 Opt_trace_object(trace).add_utf8_table(tbl).
6639 add("functionally_dependent", true);
6640 /*
6641 Pulling a table out of uncorrelated subquery in general makes
6642 it correlated. See the NOTE to this function.
6643 */
6644 sj_nest->nested_join->sj_corr_tables|= tbl->map();
6645 sj_nest->nested_join->sj_depends_on|= tbl->map();
6646 }
6647 }
6648 }
6649 } while (pulled_a_table);
6650
6651 child_li.rewind();
6652 /*
6653 Move the pulled out TABLE_LIST elements to the parents.
6654 */
6655 sj_nest->nested_join->used_tables&= ~pulled_tables;
6656 sj_nest->nested_join->not_null_tables&= ~pulled_tables;
6657
6658 /* sj_inner_tables is a copy of nested_join->used_tables */
6659 sj_nest->sj_inner_tables= sj_nest->nested_join->used_tables;
6660
6661 if (pulled_tables)
6662 {
6663 List<TABLE_LIST> *upper_join_list= (sj_nest->embedding != NULL) ?
6664 &sj_nest->embedding->nested_join->join_list :
6665 &join->select_lex->top_join_list;
6666
6667 Prepared_stmt_arena_holder ps_arena_holder(join->thd);
6668
6669 while ((tbl= child_li++))
6670 {
6671 if (tbl->table &&
6672 !(sj_nest->nested_join->used_tables & tbl->map()))
6673 {
6674 /*
6675 Pull the table up in the same way as simplify_joins() does:
6676 update join_list and embedding pointers but keep next[_local]
6677 pointers.
6678 */
6679 child_li.remove();
6680
6681 if (upper_join_list->push_back(tbl))
6682 DBUG_RETURN(TRUE);
6683
6684 tbl->join_list= upper_join_list;
6685 tbl->embedding= sj_nest->embedding;
6686 }
6687 }
6688
6689 /* Remove the sj-nest itself if we've removed everything from it */
6690 if (!sj_nest->nested_join->used_tables)
6691 {
6692 List_iterator<TABLE_LIST> li(*upper_join_list);
6693 /* Find the sj_nest in the list. */
6694 while (sj_nest != li++)
6695 {}
6696 li.remove();
6697 /* Also remove it from the list of SJ-nests: */
6698 sj_list_it.remove();
6699 }
6700 }
6701 }
6702 DBUG_RETURN(FALSE);
6703 }
6704
6705
6706 /**
6707 @defgroup RefOptimizerModule Ref Optimizer
6708
6709 @{
6710
6711 This module analyzes all equality predicates to determine the best
6712 independent ref/eq_ref/ref_or_null index access methods.
6713
6714 The 'ref' optimizer determines the columns (and expressions over them) that
6715 reference columns in other tables via an equality, and analyzes which keys
6716 and key parts can be used for index lookup based on these references. The
6717 main outcomes of the 'ref' optimizer are:
6718
6719 - A bi-directional graph of all equi-join conditions represented as an
6720 array of Key_use elements. This array is stored in JOIN::keyuse_array in
6721 table, key, keypart order. Each JOIN_TAB::keyuse points to the
6722 first Key_use element with the same table as JOIN_TAB::table.
6723
6724 - The table dependencies needed by the optimizer to determine what
6725 tables must be before certain table so that they provide the
6726 necessary column bindings for the equality predicates.
6727
6728 - Computed properties of the equality predicates such as null_rejecting
6729 and the result size of each separate condition.
6730
6731 Updates in JOIN_TAB:
6732 - JOIN_TAB::keys Bitmap of all used keys.
6733 - JOIN_TAB::const_keys Bitmap of all keys that may be used with quick_select.
6734 - JOIN_TAB::keyuse Pointer to possible keys.
6735 */
6736
6737 /**
6738 A Key_field is a descriptor of a predicate of the form (column <op> val).
6739 Currently 'op' is one of {'=', '<=>', 'IS [NOT] NULL', 'arg1 IN arg2'},
6740 and 'val' can be either another column or an expression (including constants).
6741
6742 Key_field's are used to analyze columns that may potentially serve as
6743 parts of keys for index lookup. If 'field' is part of an index, then
6744 add_key_part() creates a corresponding Key_use object and inserts it
6745 into the JOIN::keyuse_array which is passed by update_ref_and_keys().
6746
6747 The structure is used only during analysis of the candidate columns for
6748 index 'ref' access.
6749 */
6750 struct Key_field {
Key_fieldKey_field6751 Key_field(Item_field *item_field, Item *val, uint level,
6752 uint optimize, bool eq_func,
6753 bool null_rejecting, bool *cond_guard, uint sj_pred_no)
6754 : item_field(item_field), val(val), level(level),
6755 optimize(optimize), eq_func(eq_func),
6756 null_rejecting(null_rejecting), cond_guard(cond_guard),
6757 sj_pred_no(sj_pred_no)
6758 {}
6759 Item_field *item_field; ///< Item representing the column
6760 Item *val; ///< May be empty if diff constant
6761 uint level;
6762 uint optimize; ///< KEY_OPTIMIZE_*
6763 bool eq_func;
6764 /**
6765 If true, the condition this struct represents will not be satisfied
6766 when val IS NULL.
6767 @sa Key_use::null_rejecting .
6768 */
6769 bool null_rejecting;
6770 bool *cond_guard; ///< @sa Key_use::cond_guard
6771 uint sj_pred_no; ///< @sa Key_use::sj_pred_no
6772 };
6773
6774 /* Values in optimize */
6775 #define KEY_OPTIMIZE_EXISTS 1
6776 #define KEY_OPTIMIZE_REF_OR_NULL 2
6777
6778 /**
6779 Merge new key definitions to old ones, remove those not used in both.
6780
6781 This is called for OR between different levels.
6782
6783 To be able to do 'ref_or_null' we merge a comparison of a column
6784 and 'column IS NULL' to one test. This is useful for sub select queries
6785 that are internally transformed to something like:.
6786
6787 @code
6788 SELECT * FROM t1 WHERE t1.key=outer_ref_field or t1.key IS NULL
6789 @endcode
6790
6791 Key_field::null_rejecting is processed as follows: @n
6792 result has null_rejecting=true if it is set for both ORed references.
6793 for example:
6794 - (t2.key = t1.field OR t2.key = t1.field) -> null_rejecting=true
6795 - (t2.key = t1.field OR t2.key <=> t1.field) -> null_rejecting=false
6796
6797 @todo
6798 The result of this is that we're missing some 'ref' accesses.
6799 OptimizerTeam: Fix this
6800 */
6801
6802 static Key_field *
merge_key_fields(Key_field * start,Key_field * new_fields,Key_field * end,uint and_level)6803 merge_key_fields(Key_field *start, Key_field *new_fields, Key_field *end,
6804 uint and_level)
6805 {
6806 if (start == new_fields)
6807 return start; // Impossible or
6808 if (new_fields == end)
6809 return start; // No new fields, skip all
6810
6811 Key_field *first_free=new_fields;
6812
6813 /* Mark all found fields in old array */
6814 for (; new_fields != end ; new_fields++)
6815 {
6816 Field *const new_field= new_fields->item_field->field;
6817
6818 for (Key_field *old=start ; old != first_free ; old++)
6819 {
6820 Field *const old_field= old->item_field->field;
6821
6822 /*
6823 Check that the Field objects are the same, as we may have several
6824 Item_field objects pointing to the same Field:
6825 */
6826 if (old_field == new_field)
6827 {
6828 /*
6829 NOTE: below const_item() call really works as "!used_tables()", i.e.
6830 it can return FALSE where it is feasible to make it return TRUE.
6831
6832 The cause is as follows: Some of the tables are already known to be
6833 const tables (the detection code is in JOIN::make_join_plan(),
6834 above the update_ref_and_keys() call), but we didn't propagate
6835 information about this: TABLE::const_table is not set to TRUE, and
6836 Item::update_used_tables() hasn't been called for each item.
6837 The result of this is that we're missing some 'ref' accesses.
6838 TODO: OptimizerTeam: Fix this
6839 */
6840 if (!new_fields->val->const_item())
6841 {
6842 /*
6843 If the value matches, we can use the key reference.
6844 If not, we keep it until we have examined all new values
6845 */
6846 if (old->val->eq(new_fields->val, old_field->binary()))
6847 {
6848 old->level= and_level;
6849 old->optimize= ((old->optimize & new_fields->optimize &
6850 KEY_OPTIMIZE_EXISTS) |
6851 ((old->optimize | new_fields->optimize) &
6852 KEY_OPTIMIZE_REF_OR_NULL));
6853 old->null_rejecting= (old->null_rejecting &&
6854 new_fields->null_rejecting);
6855 }
6856 }
6857 else if (old->eq_func && new_fields->eq_func &&
6858 old->val->eq_by_collation(new_fields->val,
6859 old_field->binary(),
6860 old_field->charset()))
6861 {
6862 old->level= and_level;
6863 old->optimize= ((old->optimize & new_fields->optimize &
6864 KEY_OPTIMIZE_EXISTS) |
6865 ((old->optimize | new_fields->optimize) &
6866 KEY_OPTIMIZE_REF_OR_NULL));
6867 old->null_rejecting= (old->null_rejecting &&
6868 new_fields->null_rejecting);
6869 }
6870 else if (old->eq_func && new_fields->eq_func &&
6871 ((old->val->const_item() && old->val->is_null()) ||
6872 new_fields->val->is_null()))
6873 {
6874 /* field = expression OR field IS NULL */
6875 old->level= and_level;
6876 old->optimize= KEY_OPTIMIZE_REF_OR_NULL;
6877 /*
6878 Remember the NOT NULL value unless the value does not depend
6879 on other tables.
6880 */
6881 if (!old->val->used_tables() && old->val->is_null())
6882 old->val= new_fields->val;
6883 /* The referred expression can be NULL: */
6884 old->null_rejecting= 0;
6885 }
6886 else
6887 {
6888 /*
6889 We are comparing two different const. In this case we can't
6890 use a key-lookup on this so it's better to remove the value
6891 and let the range optimizer handle it
6892 */
6893 if (old == --first_free) // If last item
6894 break;
6895 *old= *first_free; // Remove old value
6896 old--; // Retry this value
6897 }
6898 }
6899 }
6900 }
6901 /* Remove all not used items */
6902 for (Key_field *old=start ; old != first_free ;)
6903 {
6904 if (old->level != and_level)
6905 { // Not used in all levels
6906 if (old == --first_free)
6907 break;
6908 *old= *first_free; // Remove old value
6909 continue;
6910 }
6911 old++;
6912 }
6913 return first_free;
6914 }
6915
6916
6917 /**
6918 Given a field, return its index in semi-join's select list, or UINT_MAX
6919
6920 @param item_field Field to be looked up in select list
6921
6922 @retval =UINT_MAX Field is not from a semijoin-transformed subquery
6923 @retval <UINT_MAX Index in select list of subquery
6924
6925 @details
6926 Given a field, find its table; then see if the table is within a
6927 semi-join nest and if the field was in select list of the subquery
6928 (if subquery was part of a quantified comparison predicate), or
6929 the field was a result of subquery decorrelation.
6930 If it was, then return the field's index in the select list.
6931 The value is used by LooseScan strategy.
6932 */
6933
get_semi_join_select_list_index(Item_field * item_field)6934 static uint get_semi_join_select_list_index(Item_field *item_field)
6935 {
6936 TABLE_LIST *emb_sj_nest= item_field->table_ref->embedding;
6937 if (emb_sj_nest && emb_sj_nest->sj_cond())
6938 {
6939 List<Item> &items= emb_sj_nest->nested_join->sj_inner_exprs;
6940 List_iterator<Item> it(items);
6941 for (uint i= 0; i < items.elements; i++)
6942 {
6943 Item *sel_item= it++;
6944 if (sel_item->type() == Item::FIELD_ITEM &&
6945 ((Item_field*)sel_item)->field->eq(item_field->field))
6946 return i;
6947 }
6948 }
6949 return UINT_MAX;
6950 }
6951
6952 /**
6953 @brief
6954 If EXPLAIN EXTENDED or if the --safe-updates option is enabled, add a
6955 warning that an index cannot be used for ref access
6956
6957 @details
6958 If EXPLAIN EXTENDED or if the --safe-updates option is enabled, add a
6959 warning for each index that cannot be used for ref access due to either type
6960 conversion or different collations on the field used for comparison
6961
6962 Example type conversion (char compared to int):
6963
6964 CREATE TABLE t1 (url char(1) PRIMARY KEY);
6965 SELECT * FROM t1 WHERE url=1;
6966
6967 Example different collations (danish vs german2):
6968
6969 CREATE TABLE t1 (url char(1) PRIMARY KEY) collate latin1_danish_ci;
6970 SELECT * FROM t1 WHERE url='1' collate latin1_german2_ci;
6971
6972 @param thd Thread for the connection that submitted the query
6973 @param field Field used in comparision
6974 @param cant_use_index Indexes that cannot be used for lookup
6975 */
6976 static void
warn_index_not_applicable(THD * thd,const Field * field,const key_map cant_use_index)6977 warn_index_not_applicable(THD *thd, const Field *field,
6978 const key_map cant_use_index)
6979 {
6980 if (thd->lex->describe ||
6981 thd->variables.option_bits & OPTION_SAFE_UPDATES)
6982 for (uint j=0 ; j < field->table->s->keys ; j++)
6983 if (cant_use_index.is_set(j))
6984 push_warning_printf(thd,
6985 Sql_condition::SL_WARNING,
6986 ER_WARN_INDEX_NOT_APPLICABLE,
6987 ER(ER_WARN_INDEX_NOT_APPLICABLE),
6988 "ref",
6989 field->table->key_info[j].name,
6990 field->field_name);
6991 }
6992
6993 /**
6994 Add a possible key to array of possible keys if it's usable as a key
6995
6996 @param key_fields[in,out] Used as an input paramater in the sense that it is a
6997 pointer to a pointer to a memory area where an array of Key_field objects will
6998 stored. It is used as an out parameter in the sense that the pointer will be
6999 updated to point beyond the last Key_field written.
7000
7001 @param and_level And level, to be stored in Key_field
7002 @param cond Condition predicate
7003 @param field Field used in comparision
7004 @param eq_func True if we used =, <=> or IS NULL
7005 @param value Array of values used for comparison with field
7006 @param num_values Number of elements in the array of values
7007 @param usable_tables Tables which can be used for key optimization
7008 @param sargables IN/OUT Array of found sargable candidates. Will be
7009 ignored in case eq_func is true.
7010
7011 @note
7012 If we are doing a NOT NULL comparison on a NOT NULL field in a outer join
7013 table, we store this to be able to do not exists optimization later.
7014
7015 @return
7016 *key_fields is incremented if we stored a key in the array
7017 */
7018
7019 static void
add_key_field(Key_field ** key_fields,uint and_level,Item_func * cond,Item_field * item_field,bool eq_func,Item ** value,uint num_values,table_map usable_tables,SARGABLE_PARAM ** sargables)7020 add_key_field(Key_field **key_fields, uint and_level, Item_func *cond,
7021 Item_field *item_field, bool eq_func, Item **value,
7022 uint num_values, table_map usable_tables,
7023 SARGABLE_PARAM **sargables)
7024 {
7025 assert(eq_func || sargables);
7026
7027 Field *const field= item_field->field;
7028 TABLE_LIST *const tl= item_field->table_ref;
7029
7030 if (tl->table->reginfo.join_tab == NULL)
7031 {
7032 /*
7033 Due to a bug in IN-to-EXISTS (grep for real_item() in item_subselect.cc
7034 for more info), an index over a field from an outer query might be
7035 considered here, which is incorrect. Their query has been fully
7036 optimized already so their reginfo.join_tab is NULL and we reject them.
7037 */
7038 return;
7039 }
7040
7041 DBUG_PRINT("info", ("add_key_field for field %s", field->field_name));
7042 uint exists_optimize= 0;
7043 if (!tl->derived_keys_ready && tl->uses_materialization() &&
7044 !tl->table->is_created() &&
7045 tl->update_derived_keys(field, value, num_values))
7046 return;
7047 if (!(field->flags & PART_KEY_FLAG))
7048 {
7049 // Don't remove column IS NULL on a LEFT JOIN table
7050 if (!eq_func || (*value)->type() != Item::NULL_ITEM ||
7051 !tl->table->is_nullable() || field->real_maybe_null())
7052 return; // Not a key. Skip it
7053 exists_optimize= KEY_OPTIMIZE_EXISTS;
7054 assert(num_values == 1);
7055 }
7056 else
7057 {
7058 table_map used_tables= 0;
7059 bool optimizable= false;
7060 for (uint i=0; i<num_values; i++)
7061 {
7062 used_tables|=(value[i])->used_tables();
7063 if (!((value[i])->used_tables() & (tl->map() | RAND_TABLE_BIT)))
7064 optimizable= true;
7065 }
7066 if (!optimizable)
7067 return;
7068 if (!(usable_tables & tl->map()))
7069 {
7070 if (!eq_func || (*value)->type() != Item::NULL_ITEM ||
7071 !tl->table->is_nullable() || field->real_maybe_null())
7072 return; // Can't use left join optimize
7073 exists_optimize= KEY_OPTIMIZE_EXISTS;
7074 }
7075 else
7076 {
7077 JOIN_TAB *stat= tl->table->reginfo.join_tab;
7078 key_map possible_keys=field->key_start;
7079 possible_keys.intersect(tl->table->keys_in_use_for_query);
7080 stat[0].keys().merge(possible_keys); // Add possible keys
7081
7082 /*
7083 Save the following cases:
7084 Field op constant
7085 Field LIKE constant where constant doesn't start with a wildcard
7086 Field = field2 where field2 is in a different table
7087 Field op formula
7088 Field IS NULL
7089 Field IS NOT NULL
7090 Field BETWEEN ...
7091 Field IN ...
7092 */
7093 stat[0].key_dependent|=used_tables;
7094
7095 bool is_const= true;
7096 for (uint i=0; i<num_values; i++)
7097 {
7098 if (!(is_const&= value[i]->const_item()))
7099 break;
7100 }
7101 if (is_const)
7102 stat[0].const_keys.merge(possible_keys);
7103 else if (!eq_func)
7104 {
7105 /*
7106 Save info to be able check whether this predicate can be
7107 considered as sargable for range analysis after reading const tables.
7108 We do not save info about equalities as update_const_equal_items
7109 will take care of updating info on keys from sargable equalities.
7110 */
7111 assert(sargables);
7112 (*sargables)--;
7113 /*
7114 The sargables and key_fields arrays share the same memory
7115 buffer, and grow from opposite directions, so make sure they
7116 don't cross.
7117 */
7118 assert(*sargables > *reinterpret_cast<SARGABLE_PARAM**>(key_fields));
7119 (*sargables)->field= field;
7120 (*sargables)->arg_value= value;
7121 (*sargables)->num_values= num_values;
7122 }
7123 /*
7124 We can't always use indexes when comparing a string index to a
7125 number. cmp_type() is checked to allow compare of dates to numbers.
7126 eq_func is NEVER true when num_values > 1
7127 */
7128 if (!eq_func)
7129 return;
7130
7131 /*
7132 Check if the field and value are comparable in the index.
7133 @todo: This code is almost identical to comparable_in_index()
7134 in opt_range.cc. Consider replacing the checks below with a
7135 function call to comparable_in_index()
7136 */
7137 if (field->result_type() == STRING_RESULT)
7138 {
7139 if ((*value)->result_type() != STRING_RESULT)
7140 {
7141 if (field->cmp_type() != (*value)->result_type())
7142 {
7143 warn_index_not_applicable(stat->join()->thd, field, possible_keys);
7144 return;
7145 }
7146 }
7147 else
7148 {
7149 /*
7150 Can't optimize datetime_column=indexed_varchar_column,
7151 also can't use indexes if the effective collation
7152 of the operation differ from the field collation.
7153 IndexedTimeComparedToDate: can't optimize
7154 'indexed_time = temporal_expr_with_date_part' because:
7155 - without index, a TIME column with value '48:00:00' is equal to a
7156 DATETIME column with value 'CURDATE() + 2 days'
7157 - with ref access into the TIME column, CURDATE() + 2 days becomes
7158 "00:00:00" (Field_timef::store_internal() simply extracts the time
7159 part from the datetime) which is a lookup key which does not match
7160 "48:00:00"; so ref access is not be able to give the same result
7161 as without index, so is disabled.
7162 On the other hand, we can optimize indexed_datetime = time
7163 because Field_temporal_with_date::store_time() will convert
7164 48:00:00 to CURDATE() + 2 days which is the correct lookup key.
7165 */
7166 if ((!field->is_temporal() && value[0]->is_temporal()) ||
7167 (field->cmp_type() == STRING_RESULT &&
7168 field->charset() != cond->compare_collation()) ||
7169 field_time_cmp_date(field, value[0]))
7170 {
7171 warn_index_not_applicable(stat->join()->thd, field, possible_keys);
7172 return;
7173 }
7174 }
7175 }
7176
7177 /*
7178 We can't use indexes when comparing to a JSON value. For example,
7179 the string '{}' should compare equal to the JSON string "{}". If
7180 we use a string index to compare the two strings, we will be
7181 comparing '{}' and '"{}"', which don't compare equal.
7182 */
7183 if (value[0]->result_type() == STRING_RESULT &&
7184 value[0]->field_type() == MYSQL_TYPE_JSON)
7185 {
7186 warn_index_not_applicable(stat->join()->thd, field, possible_keys);
7187 return;
7188 }
7189 }
7190 }
7191 /*
7192 For the moment eq_func is always true. This slot is reserved for future
7193 extensions where we want to remembers other things than just eq comparisons
7194 */
7195 assert(eq_func);
7196 /*
7197 If the condition has form "tbl.keypart = othertbl.field" and
7198 othertbl.field can be NULL, there will be no matches if othertbl.field
7199 has NULL value.
7200 We use null_rejecting in add_not_null_conds() to add
7201 'othertbl.field IS NOT NULL' to tab->m_condition, if this is not an outer
7202 join. We also use it to shortcut reading "tbl" when othertbl.field is
7203 found to be a NULL value (in join_read_always_key() and BKA).
7204 */
7205 Item *const real= (*value)->real_item();
7206 const bool null_rejecting=
7207 ((cond->functype() == Item_func::EQ_FUNC) ||
7208 (cond->functype() == Item_func::MULT_EQUAL_FUNC)) &&
7209 (real->type() == Item::FIELD_ITEM) &&
7210 ((Item_field*)real)->field->maybe_null();
7211
7212 /* Store possible eq field */
7213 new (*key_fields)
7214 Key_field(item_field, *value, and_level, exists_optimize, eq_func,
7215 null_rejecting, NULL,
7216 get_semi_join_select_list_index(item_field));
7217 (*key_fields)++;
7218 /*
7219 The sargables and key_fields arrays share the same memory buffer,
7220 and grow from opposite directions, so make sure they don't
7221 cross. But if sargables was NULL, eq_func had to be true and we
7222 don't write any sargables.
7223 */
7224 assert(sargables == NULL ||
7225 *key_fields < *reinterpret_cast<Key_field**>(sargables));
7226 }
7227
7228 /**
7229 Add possible keys to array of possible keys originated from a simple
7230 predicate.
7231
7232 @param key_fields Pointer to add key, if usable
7233 @param and_level And level, to be stored in Key_field
7234 @param cond Condition predicate
7235 @param field_item Field used in comparision
7236 @param eq_func True if we used =, <=> or IS NULL
7237 @param val Value used for comparison with field
7238 Is NULL for BETWEEN and IN
7239 @param usable_tables Tables which can be used for key optimization
7240 @param sargables IN/OUT Array of found sargable candidates
7241
7242 @note
7243 If field items f1 and f2 belong to the same multiple equality and
7244 a key is added for f1, the the same key is added for f2.
7245
7246 @returns
7247 *key_fields is incremented if we stored a key in the array
7248 */
7249
7250 static void
add_key_equal_fields(Key_field ** key_fields,uint and_level,Item_func * cond,Item_field * field_item,bool eq_func,Item ** val,uint num_values,table_map usable_tables,SARGABLE_PARAM ** sargables)7251 add_key_equal_fields(Key_field **key_fields, uint and_level,
7252 Item_func *cond, Item_field *field_item,
7253 bool eq_func, Item **val,
7254 uint num_values, table_map usable_tables,
7255 SARGABLE_PARAM **sargables)
7256 {
7257 DBUG_ENTER("add_key_equal_fields");
7258
7259 add_key_field(key_fields, and_level, cond, field_item,
7260 eq_func, val, num_values, usable_tables, sargables);
7261 Item_equal *item_equal= field_item->item_equal;
7262 if (item_equal)
7263 {
7264 /*
7265 Add to the set of possible key values every substitution of
7266 the field for an equal field included into item_equal
7267 */
7268 Item_equal_iterator it(*item_equal);
7269 Item_field *item;
7270 while ((item= it++))
7271 {
7272 if (!field_item->field->eq(item->field))
7273 add_key_field(key_fields, and_level, cond, item,
7274 eq_func, val, num_values, usable_tables,
7275 sargables);
7276 }
7277 }
7278 DBUG_VOID_RETURN;
7279 }
7280
7281
7282 /**
7283 Check if an expression is a non-outer field.
7284
7285 Checks if an expression is a field and belongs to the current select.
7286
7287 @param field Item expression to check
7288
7289 @return boolean
7290 @retval TRUE the expression is a local field
7291 @retval FALSE it's something else
7292 */
7293
7294 static bool
is_local_field(Item * field)7295 is_local_field (Item *field)
7296 {
7297 return field->real_item()->type() == Item::FIELD_ITEM &&
7298 !(field->used_tables() & OUTER_REF_TABLE_BIT) &&
7299 !down_cast<Item_ident *>(field)->depended_from &&
7300 !down_cast<Item_ident *>(field->real_item())->depended_from;
7301 }
7302
7303
7304 /**
7305 Check if a row constructor expression is over columns in the same query block.
7306
7307 @param item_row Row expression to check.
7308
7309 @return boolean
7310 @retval true The expression is a local column reference.
7311 @retval false It's something else.
7312 */
is_row_of_local_columns(Item_row * item_row)7313 static bool is_row_of_local_columns(Item_row *item_row)
7314 {
7315 for (uint i= 0; i < item_row->cols(); ++i)
7316 if (!is_local_field(item_row->element_index(i)))
7317 return false;
7318 return true;
7319 }
7320
7321
7322 /**
7323 The guts of the ref optimizer. This function, along with the other
7324 add_key_* functions, make up a recursive procedure that analyzes a
7325 condition expression (a tree of AND and OR predicates) and does
7326 many things.
7327
7328 @param join The query block involving the condition.
7329
7330 @param key_fields[in,out] Start of memory buffer, see below.
7331 @param and_level[in, out] Current 'and level', see below.
7332 @param cond The conditional expression to analyze.
7333 @param usable_tables Tables not in this bitmap will not be examined.
7334 @param sargables [in,out] End of memory buffer, see below.
7335
7336 This documentation is the result of reverse engineering and may
7337 therefore not capture the full gist of the procedure, but it is
7338 known to do the following:
7339
7340 - Populate a raw memory buffer from two directions at the same time. An
7341 'array' of Key_field objects fill the buffer from low to high addresses
7342 whilst an 'array' of SARGABLE_PARAM's fills the buffer from high to low
7343 addresses. At the first call to this function, it is assumed that
7344 key_fields points to the beginning of the buffer and sargables point to the
7345 end (except for a poor-mans 'null element' at the very end).
7346
7347 - Update a number of properties in the JOIN_TAB's that can be used
7348 to find search keys (sargables).
7349
7350 - JOIN_TAB::keys
7351 - JOIN_TAB::key_dependent
7352 - JOIN_TAB::const_keys (dictates if the range optimizer will be run
7353 later.)
7354
7355 The Key_field objects are marked with something called an 'and_level', which
7356 does @b not correspond to their nesting depth within the expression tree. It
7357 is rather a tag to group conjunctions together. For instance, in the
7358 conditional expression
7359
7360 @code
7361 a = 0 AND b = 0
7362 @endcode
7363
7364 two Key_field's are produced, both having an and_level of 0.
7365
7366 In an expression such as
7367
7368 @code
7369 a = 0 AND b = 0 OR a = 1
7370 @endcode
7371
7372 three Key_field's are produced, the first two corresponding to 'a = 0' and
7373 'b = 0', respectively, both with and_level 0. The third one corresponds to
7374 'a = 1' and has an and_level of 1.
7375
7376 A separate function, merge_key_fields() performs ref access validation on
7377 the Key_field array on the recursice ascent. If some Key_field's cannot be
7378 used for ref access, the key_fields pointer is rolled back. All other
7379 modifications to the query plan remain.
7380 */
7381 static void
add_key_fields(JOIN * join,Key_field ** key_fields,uint * and_level,Item * cond,table_map usable_tables,SARGABLE_PARAM ** sargables)7382 add_key_fields(JOIN *join, Key_field **key_fields, uint *and_level,
7383 Item *cond, table_map usable_tables,
7384 SARGABLE_PARAM **sargables)
7385 {
7386 DBUG_ENTER("add_key_fields");
7387 if (cond->type() == Item_func::COND_ITEM)
7388 {
7389 List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
7390 Key_field *org_key_fields= *key_fields;
7391
7392 if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
7393 {
7394 Item *item;
7395 while ((item=li++))
7396 add_key_fields(join, key_fields, and_level, item, usable_tables,
7397 sargables);
7398 for (; org_key_fields != *key_fields ; org_key_fields++)
7399 org_key_fields->level= *and_level;
7400 }
7401 else
7402 {
7403 (*and_level)++;
7404 add_key_fields(join, key_fields, and_level, li++, usable_tables,
7405 sargables);
7406 Item *item;
7407 while ((item=li++))
7408 {
7409 Key_field *start_key_fields= *key_fields;
7410 (*and_level)++;
7411 add_key_fields(join, key_fields, and_level, item, usable_tables,
7412 sargables);
7413 *key_fields=merge_key_fields(org_key_fields,start_key_fields,
7414 *key_fields,++(*and_level));
7415 }
7416 }
7417 DBUG_VOID_RETURN;
7418 }
7419
7420 /*
7421 Subquery optimization: Conditions that are pushed down into subqueries
7422 are wrapped into Item_func_trig_cond. We process the wrapped condition
7423 but need to set cond_guard for Key_use elements generated from it.
7424 */
7425 {
7426 if (cond->type() == Item::FUNC_ITEM &&
7427 ((Item_func*)cond)->functype() == Item_func::TRIG_COND_FUNC)
7428 {
7429 Item *cond_arg= ((Item_func*)cond)->arguments()[0];
7430 if (!join->group_list && !join->order &&
7431 join->unit->item &&
7432 join->unit->item->substype() == Item_subselect::IN_SUBS &&
7433 !join->unit->is_union())
7434 {
7435 Key_field *save= *key_fields;
7436 add_key_fields(join, key_fields, and_level, cond_arg, usable_tables,
7437 sargables);
7438 // Indicate that this ref access candidate is for subquery lookup:
7439 for (; save != *key_fields; save++)
7440 save->cond_guard= ((Item_func_trig_cond*)cond)->get_trig_var();
7441 }
7442 DBUG_VOID_RETURN;
7443 }
7444 }
7445
7446 /* If item is of type 'field op field/constant' add it to key_fields */
7447 if (cond->type() != Item::FUNC_ITEM)
7448 DBUG_VOID_RETURN;
7449 Item_func *cond_func= (Item_func*) cond;
7450 switch (cond_func->select_optimize()) {
7451 case Item_func::OPTIMIZE_NONE:
7452 break;
7453 case Item_func::OPTIMIZE_KEY:
7454 {
7455 Item **values;
7456 /*
7457 Build list of possible keys for 'a BETWEEN low AND high'.
7458 It is handled similar to the equivalent condition
7459 'a >= low AND a <= high':
7460 */
7461 if (cond_func->functype() == Item_func::BETWEEN)
7462 {
7463 Item_field *field_item;
7464 bool equal_func= FALSE;
7465 uint num_values= 2;
7466 values= cond_func->arguments();
7467
7468 bool binary_cmp= (values[0]->real_item()->type() == Item::FIELD_ITEM)
7469 ? ((Item_field*)values[0]->real_item())->field->binary()
7470 : TRUE;
7471
7472 /*
7473 Additional optimization: If 'low = high':
7474 Handle as if the condition was "t.key = low".
7475 */
7476 if (!((Item_func_between*)cond_func)->negated &&
7477 values[1]->eq(values[2], binary_cmp))
7478 {
7479 equal_func= TRUE;
7480 num_values= 1;
7481 }
7482
7483 /*
7484 Append keys for 'field <cmp> value[]' if the
7485 condition is of the form::
7486 '<field> BETWEEN value[1] AND value[2]'
7487 */
7488 if (is_local_field (values[0]))
7489 {
7490 field_item= (Item_field *) (values[0]->real_item());
7491 add_key_equal_fields(key_fields, *and_level, cond_func,
7492 field_item, equal_func, &values[1],
7493 num_values, usable_tables, sargables);
7494 }
7495 /*
7496 Append keys for 'value[0] <cmp> field' if the
7497 condition is of the form:
7498 'value[0] BETWEEN field1 AND field2'
7499 */
7500 for (uint i= 1; i <= num_values; i++)
7501 {
7502 if (is_local_field (values[i]))
7503 {
7504 field_item= (Item_field *) (values[i]->real_item());
7505 add_key_equal_fields(key_fields, *and_level, cond_func,
7506 field_item, equal_func, values,
7507 1, usable_tables, sargables);
7508 }
7509 }
7510 } // if ( ... Item_func::BETWEEN)
7511
7512 // The predicate is IN or !=
7513 else if (is_local_field (cond_func->key_item()) &&
7514 !(cond_func->used_tables() & OUTER_REF_TABLE_BIT))
7515 {
7516 values= cond_func->arguments()+1;
7517 if (cond_func->functype() == Item_func::NE_FUNC &&
7518 is_local_field (cond_func->arguments()[1]))
7519 values--;
7520 assert(cond_func->functype() != Item_func::IN_FUNC ||
7521 cond_func->argument_count() != 2);
7522 add_key_equal_fields(key_fields, *and_level, cond_func,
7523 (Item_field*) (cond_func->key_item()->real_item()),
7524 0, values,
7525 cond_func->argument_count()-1,
7526 usable_tables, sargables);
7527 }
7528 else if (cond_func->functype() == Item_func::IN_FUNC &&
7529 cond_func->key_item()->type() == Item::ROW_ITEM)
7530 {
7531 /*
7532 The condition is (column1, column2, ... ) IN ((const1_1, const1_2), ...)
7533 and there is an index on (column1, column2, ...)
7534
7535 The code below makes sure that the row constructor on the lhs indeed
7536 contains only column references before calling add_key_field on them.
7537
7538 We can't do a ref access on IN, yet here we are. Why? We need
7539 to run add_key_field() only because it verifies that there are
7540 only constant expressions in the rows on the IN's rhs, see
7541 comment above the call to add_key_field() below.
7542
7543 Actually, We could in theory do a ref access if the IN rhs
7544 contained just a single row, but there is a hack in the parser
7545 causing such IN predicates be parsed as row equalities.
7546 */
7547 Item_row *lhs_row= static_cast<Item_row*>(cond_func->key_item());
7548 if (is_row_of_local_columns(lhs_row))
7549 {
7550 for (uint i= 0; i < lhs_row->cols(); ++i)
7551 {
7552 Item *const lhs_item= lhs_row->element_index(i)->real_item();
7553 assert(lhs_item->type() == Item::FIELD_ITEM);
7554 Item_field *const lhs_column= static_cast<Item_field*>(lhs_item);
7555 // j goes from 1 since arguments()[0] is the lhs of IN.
7556 for (uint j= 1; j < cond_func->argument_count(); ++j)
7557 {
7558 // Here we pick out the i:th column in the j:th row.
7559 Item *rhs_item= cond_func->arguments()[j];
7560 assert(rhs_item->type() == Item::ROW_ITEM);
7561 Item_row *rhs_row= static_cast<Item_row*>(rhs_item);
7562 assert(rhs_row->cols() == lhs_row->cols());
7563 Item **rhs_expr_ptr= rhs_row->addr(i);
7564 /*
7565 add_key_field() will write a Key_field on each call
7566 here, but we don't care, it will never be used. We only
7567 call it for the side effect: update JOIN_TAB::const_keys
7568 so the range optimizer can be invoked. We pass a
7569 scrap buffer and pointer here.
7570 */
7571 Key_field scrap_key_field= **key_fields;
7572 Key_field *scrap_key_field_ptr= &scrap_key_field;
7573 add_key_field(&scrap_key_field_ptr,
7574 *and_level,
7575 cond_func,
7576 lhs_column,
7577 true, // eq_func
7578 rhs_expr_ptr,
7579 1, // Number of expressions: one
7580 usable_tables,
7581 NULL); // sargables
7582 // The pointer is not supposed to increase by more than one.
7583 assert(scrap_key_field_ptr <= &scrap_key_field + 1);
7584 }
7585 }
7586 }
7587 }
7588 break;
7589 }
7590 case Item_func::OPTIMIZE_OP:
7591 {
7592 bool equal_func=(cond_func->functype() == Item_func::EQ_FUNC ||
7593 cond_func->functype() == Item_func::EQUAL_FUNC);
7594
7595 if (is_local_field (cond_func->arguments()[0]))
7596 {
7597 add_key_equal_fields(key_fields, *and_level, cond_func,
7598 (Item_field*) (cond_func->arguments()[0])->real_item(),
7599 equal_func,
7600 cond_func->arguments()+1, 1, usable_tables,
7601 sargables);
7602 }
7603 if (is_local_field (cond_func->arguments()[1]) &&
7604 cond_func->functype() != Item_func::LIKE_FUNC)
7605 {
7606 add_key_equal_fields(key_fields, *and_level, cond_func,
7607 (Item_field*) (cond_func->arguments()[1])->real_item(),
7608 equal_func,
7609 cond_func->arguments(),1,usable_tables,
7610 sargables);
7611 }
7612 break;
7613 }
7614 case Item_func::OPTIMIZE_NULL:
7615 /* column_name IS [NOT] NULL */
7616 if (is_local_field (cond_func->arguments()[0]) &&
7617 !(cond_func->used_tables() & OUTER_REF_TABLE_BIT))
7618 {
7619 Item *tmp=new Item_null;
7620 if (unlikely(!tmp)) // Should never be true
7621 DBUG_VOID_RETURN;
7622 add_key_equal_fields(key_fields, *and_level, cond_func,
7623 (Item_field*) (cond_func->arguments()[0])->real_item(),
7624 cond_func->functype() == Item_func::ISNULL_FUNC,
7625 &tmp, 1, usable_tables, sargables);
7626 }
7627 break;
7628 case Item_func::OPTIMIZE_EQUAL:
7629 Item_equal *item_equal= (Item_equal *) cond;
7630 Item *const_item= item_equal->get_const();
7631 if (const_item)
7632 {
7633 /*
7634 For each field field1 from item_equal consider the equality
7635 field1=const_item as a condition allowing an index access of the table
7636 with field1 by the keys value of field1.
7637 */
7638 Item_equal_iterator it(*item_equal);
7639 Item_field *item;
7640 while ((item= it++))
7641 {
7642 add_key_field(key_fields, *and_level, cond_func, item,
7643 TRUE, &const_item, 1, usable_tables, sargables);
7644 }
7645 }
7646 else
7647 {
7648 /*
7649 Consider all pairs of different fields included into item_equal.
7650 For each of them (field1, field1) consider the equality
7651 field1=field2 as a condition allowing an index access of the table
7652 with field1 by the keys value of field2.
7653 */
7654 Item_equal_iterator outer_it(*item_equal);
7655 Item_equal_iterator inner_it(*item_equal);
7656 Item_field *outer;
7657 while ((outer= outer_it++))
7658 {
7659 Item_field *inner;
7660 while ((inner= inner_it++))
7661 {
7662 if (!outer->field->eq(inner->field))
7663 add_key_field(key_fields, *and_level, cond_func, outer,
7664 true, (Item **) &inner, 1, usable_tables,
7665 sargables);
7666 }
7667 inner_it.rewind();
7668 }
7669 }
7670 break;
7671 }
7672 DBUG_VOID_RETURN;
7673 }
7674
7675
7676 /*
7677 Add all keys with uses 'field' for some keypart
7678 If field->and_level != and_level then only mark key_part as const_part
7679
7680 RETURN
7681 0 - OK
7682 1 - Out of memory.
7683 */
7684
7685 static bool
add_key_part(Key_use_array * keyuse_array,Key_field * key_field)7686 add_key_part(Key_use_array *keyuse_array, Key_field *key_field)
7687 {
7688 if (key_field->eq_func && !(key_field->optimize & KEY_OPTIMIZE_EXISTS))
7689 {
7690 Field *const field= key_field->item_field->field;
7691 TABLE_LIST *const tl= key_field->item_field->table_ref;
7692 TABLE *const table= tl->table;
7693
7694 for (uint key=0 ; key < table->s->keys ; key++)
7695 {
7696 if (!(table->keys_in_use_for_query.is_set(key)))
7697 continue;
7698 if (table->key_info[key].flags & (HA_FULLTEXT | HA_SPATIAL))
7699 continue; // ToDo: ft-keys in non-ft queries. SerG
7700
7701 uint key_parts= actual_key_parts(&table->key_info[key]);
7702 for (uint part=0 ; part < key_parts ; part++)
7703 {
7704 if (field->eq(table->key_info[key].key_part[part].field))
7705 {
7706 const Key_use keyuse(tl,
7707 key_field->val,
7708 key_field->val->used_tables(),
7709 key,
7710 part,
7711 key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL,
7712 (key_part_map) 1 << part,
7713 ~(ha_rows) 0, // will be set in optimize_keyuse
7714 key_field->null_rejecting,
7715 key_field->cond_guard,
7716 key_field->sj_pred_no);
7717 if (keyuse_array->push_back(keyuse))
7718 return true; /* purecov: inspected */
7719 }
7720 }
7721 }
7722 }
7723 return false;
7724 }
7725
7726
7727 /**
7728 Function parses WHERE condition and add key_use for FT index
7729 into key_use array if suitable MATCH function is found.
7730 Condition should be a set of AND expression, OR is not supported.
7731 MATCH function should be a part of simple expression.
7732 Simple expression is MATCH only function or MATCH is a part of
7733 comparison expression ('>=' or '>' operations are supported).
7734 It also sets FT_HINTS values(op_type, op_value).
7735
7736 @param keyuse_array Key_use array
7737 @param stat JOIN_TAB structure
7738 @param cond WHERE condition
7739 @param usable_tables usable tables
7740 @param simple_match_expr true if this is the first call false otherwise.
7741 if MATCH function is found at first call it means
7742 that MATCH is simple expression, otherwise, in case
7743 of AND/OR condition this parameter will be false.
7744
7745 @retval
7746 true if FT key was added to Key_use array
7747 @retval
7748 false if no key was added to Key_use array
7749
7750 */
7751
7752 static bool
add_ft_keys(Key_use_array * keyuse_array,JOIN_TAB * stat,Item * cond,table_map usable_tables,bool simple_match_expr)7753 add_ft_keys(Key_use_array *keyuse_array,
7754 JOIN_TAB *stat,Item *cond,table_map usable_tables,
7755 bool simple_match_expr)
7756 {
7757 Item_func_match *cond_func=NULL;
7758
7759 if (!cond)
7760 return FALSE;
7761
7762 if (cond->type() == Item::FUNC_ITEM)
7763 {
7764 Item_func *func=(Item_func *)cond;
7765 Item_func::Functype functype= func->functype();
7766 enum ft_operation op_type= FT_OP_NO;
7767 double op_value= 0.0;
7768 if (functype == Item_func::FT_FUNC)
7769 {
7770 cond_func= ((Item_func_match *) cond)->get_master();
7771 cond_func->set_hints_op(op_type, op_value);
7772 }
7773 else if (func->arg_count == 2)
7774 {
7775 Item *arg0=(func->arguments()[0]),
7776 *arg1=(func->arguments()[1]);
7777 if (arg1->const_item() &&
7778 arg0->type() == Item::FUNC_ITEM &&
7779 ((Item_func *) arg0)->functype() == Item_func::FT_FUNC &&
7780 ((functype == Item_func::GE_FUNC &&
7781 (op_value= arg1->val_real()) > 0) ||
7782 (functype == Item_func::GT_FUNC &&
7783 (op_value= arg1->val_real()) >=0)))
7784 {
7785 cond_func= ((Item_func_match *) arg0)->get_master();
7786 if (functype == Item_func::GE_FUNC)
7787 op_type= FT_OP_GE;
7788 else if (functype == Item_func::GT_FUNC)
7789 op_type= FT_OP_GT;
7790 cond_func->set_hints_op(op_type, op_value);
7791 }
7792 else if (arg0->const_item() &&
7793 arg1->type() == Item::FUNC_ITEM &&
7794 ((Item_func *) arg1)->functype() == Item_func::FT_FUNC &&
7795 ((functype == Item_func::LE_FUNC &&
7796 (op_value= arg0->val_real()) > 0) ||
7797 (functype == Item_func::LT_FUNC &&
7798 (op_value= arg0->val_real()) >=0)))
7799 {
7800 cond_func= ((Item_func_match *) arg1)->get_master();
7801 if (functype == Item_func::LE_FUNC)
7802 op_type= FT_OP_GE;
7803 else if (functype == Item_func::LT_FUNC)
7804 op_type= FT_OP_GT;
7805 cond_func->set_hints_op(op_type, op_value);
7806 }
7807 }
7808 }
7809 else if (cond->type() == Item::COND_ITEM)
7810 {
7811 List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
7812
7813 if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
7814 {
7815 Item *item;
7816 while ((item=li++))
7817 {
7818 if (add_ft_keys(keyuse_array, stat, item, usable_tables, false))
7819 return TRUE;
7820 }
7821 }
7822 }
7823
7824 if (!cond_func || cond_func->key == NO_SUCH_KEY ||
7825 !(usable_tables & cond_func->table_ref->map()))
7826 return FALSE;
7827
7828 cond_func->set_simple_expression(simple_match_expr);
7829
7830 const Key_use keyuse(cond_func->table_ref,
7831 cond_func,
7832 cond_func->key_item()->used_tables(),
7833 cond_func->key,
7834 FT_KEYPART,
7835 0, // optimize
7836 0, // keypart_map
7837 ~(ha_rows)0, // ref_table_rows
7838 false, // null_rejecting
7839 NULL, // cond_guard
7840 UINT_MAX); // sj_pred_no
7841 return keyuse_array->push_back(keyuse);
7842 }
7843
7844
7845 /**
7846 Compares two keyuse elements.
7847
7848 @param a first Key_use element
7849 @param b second Key_use element
7850
7851 Compare Key_use elements so that they are sorted as follows:
7852 -# By table.
7853 -# By key for each table.
7854 -# By keypart for each key.
7855 -# Const values.
7856 -# Ref_or_null.
7857
7858 @retval 0 If a = b.
7859 @retval <0 If a < b.
7860 @retval >0 If a > b.
7861 */
sort_keyuse(Key_use * a,Key_use * b)7862 static int sort_keyuse(Key_use *a, Key_use *b)
7863 {
7864 int res;
7865 if (a->table_ref->tableno() != b->table_ref->tableno())
7866 return (int) (a->table_ref->tableno() - b->table_ref->tableno());
7867 if (a->key != b->key)
7868 return (int) (a->key - b->key);
7869 if (a->keypart != b->keypart)
7870 return (int) (a->keypart - b->keypart);
7871 // Place const values before other ones
7872 if ((res= MY_TEST((a->used_tables & ~OUTER_REF_TABLE_BIT)) -
7873 MY_TEST((b->used_tables & ~OUTER_REF_TABLE_BIT))))
7874 return res;
7875 /* Place rows that are not 'OPTIMIZE_REF_OR_NULL' first */
7876 return (int) ((a->optimize & KEY_OPTIMIZE_REF_OR_NULL) -
7877 (b->optimize & KEY_OPTIMIZE_REF_OR_NULL));
7878 }
7879
7880
7881 /*
7882 Add to Key_field array all 'ref' access candidates within nested join.
7883
7884 This function populates Key_field array with entries generated from the
7885 ON condition of the given nested join, and does the same for nested joins
7886 contained within this nested join.
7887
7888 @param[in] nested_join_table Nested join pseudo-table to process
7889 @param[in,out] end End of the key field array
7890 @param[in,out] and_level And-level
7891 @param[in,out] sargables Array of found sargable candidates
7892
7893
7894 @note
7895 We can add accesses to the tables that are direct children of this nested
7896 join (1), and are not inner tables w.r.t their neighbours (2).
7897
7898 Example for #1 (outer brackets pair denotes nested join this function is
7899 invoked for):
7900 @code
7901 ... LEFT JOIN (t1 LEFT JOIN (t2 ... ) ) ON cond
7902 @endcode
7903 Example for #2:
7904 @code
7905 ... LEFT JOIN (t1 LEFT JOIN t2 ) ON cond
7906 @endcode
7907 In examples 1-2 for condition cond, we can add 'ref' access candidates to
7908 t1 only.
7909 Example #3:
7910 @code
7911 ... LEFT JOIN (t1, t2 LEFT JOIN t3 ON inner_cond) ON cond
7912 @endcode
7913 Here we can add 'ref' access candidates for t1 and t2, but not for t3.
7914 */
7915
add_key_fields_for_nj(JOIN * join,TABLE_LIST * nested_join_table,Key_field ** end,uint * and_level,SARGABLE_PARAM ** sargables)7916 static void add_key_fields_for_nj(JOIN *join, TABLE_LIST *nested_join_table,
7917 Key_field **end, uint *and_level,
7918 SARGABLE_PARAM **sargables)
7919 {
7920 List_iterator<TABLE_LIST> li(nested_join_table->nested_join->join_list);
7921 List_iterator<TABLE_LIST> li2(nested_join_table->nested_join->join_list);
7922 bool have_another = FALSE;
7923 table_map tables= 0;
7924 TABLE_LIST *table;
7925 assert(nested_join_table->nested_join);
7926
7927 while ((table= li++) || (have_another && (li=li2, have_another=FALSE,
7928 (table= li++))))
7929 {
7930 if (table->nested_join)
7931 {
7932 if (!table->join_cond_optim())
7933 {
7934 /* It's a semi-join nest. Walk into it as if it wasn't a nest */
7935 have_another= TRUE;
7936 li2= li;
7937 li= List_iterator<TABLE_LIST>(table->nested_join->join_list);
7938 }
7939 else
7940 add_key_fields_for_nj(join, table, end, and_level, sargables);
7941 }
7942 else
7943 if (!table->join_cond_optim())
7944 tables|= table->map();
7945 }
7946 if (nested_join_table->join_cond_optim())
7947 add_key_fields(join, end, and_level, nested_join_table->join_cond_optim(),
7948 tables, sargables);
7949 }
7950
7951
7952 /// @} (end of group RefOptimizerModule)
7953
7954
7955 /**
7956 Check for the presence of AGGFN(DISTINCT a) queries that may be subject
7957 to loose index scan.
7958
7959
7960 Check if the query is a subject to AGGFN(DISTINCT) using loose index scan
7961 (QUICK_GROUP_MIN_MAX_SELECT).
7962 Optionally (if out_args is supplied) will push the arguments of
7963 AGGFN(DISTINCT) to the list
7964
7965 Check for every COUNT(DISTINCT), AVG(DISTINCT) or
7966 SUM(DISTINCT). These can be resolved by Loose Index Scan as long
7967 as all the aggregate distinct functions refer to the same
7968 fields. Thus:
7969
7970 SELECT AGGFN(DISTINCT a, b), AGGFN(DISTINCT b, a)... => can use LIS
7971 SELECT AGGFN(DISTINCT a), AGGFN(DISTINCT a) ... => can use LIS
7972 SELECT AGGFN(DISTINCT a, b), AGGFN(DISTINCT a) ... => cannot use LIS
7973 SELECT AGGFN(DISTINCT a), AGGFN(DISTINCT b) ... => cannot use LIS
7974 etc.
7975
7976 @param join the join to check
7977 @param[out] out_args Collect the arguments of the aggregate functions
7978 to a list. We don't worry about duplicates as
7979 these will be sorted out later in
7980 get_best_group_min_max.
7981
7982 @return does the query qualify for indexed AGGFN(DISTINCT)
7983 @retval true it does
7984 @retval false AGGFN(DISTINCT) must apply distinct in it.
7985 */
7986
7987 bool
is_indexed_agg_distinct(JOIN * join,List<Item_field> * out_args)7988 is_indexed_agg_distinct(JOIN *join, List<Item_field> *out_args)
7989 {
7990 Item_sum **sum_item_ptr;
7991 bool result= false;
7992 Field_map first_aggdistinct_fields;
7993
7994 if (join->primary_tables > 1 || /* reference more than 1 table */
7995 join->select_distinct || /* or a DISTINCT */
7996 join->select_lex->olap == ROLLUP_TYPE) /* Check (B3) for ROLLUP */
7997 return false;
7998
7999 if (join->make_sum_func_list(join->all_fields, join->fields_list, true))
8000 return false;
8001
8002 for (sum_item_ptr= join->sum_funcs; *sum_item_ptr; sum_item_ptr++)
8003 {
8004 Item_sum *sum_item= *sum_item_ptr;
8005 Field_map cur_aggdistinct_fields;
8006 Item *expr;
8007 /* aggregate is not AGGFN(DISTINCT) or more than 1 argument to it */
8008 switch (sum_item->sum_func())
8009 {
8010 case Item_sum::MIN_FUNC:
8011 case Item_sum::MAX_FUNC:
8012 continue;
8013 case Item_sum::COUNT_DISTINCT_FUNC:
8014 break;
8015 case Item_sum::AVG_DISTINCT_FUNC:
8016 case Item_sum::SUM_DISTINCT_FUNC:
8017 if (sum_item->get_arg_count() == 1)
8018 break;
8019 /* fall through */
8020 default: return false;
8021 }
8022
8023 for (uint i= 0; i < sum_item->get_arg_count(); i++)
8024 {
8025 expr= sum_item->get_arg(i);
8026 /* The AGGFN(DISTINCT) arg is not an attribute? */
8027 if (expr->real_item()->type() != Item::FIELD_ITEM)
8028 return false;
8029
8030 Item_field* item= static_cast<Item_field*>(expr->real_item());
8031 if (out_args)
8032 out_args->push_back(item);
8033
8034 cur_aggdistinct_fields.set_bit(item->field->field_index);
8035 result= true;
8036 }
8037 /*
8038 If there are multiple aggregate functions, make sure that they all
8039 refer to exactly the same set of columns.
8040 */
8041 if (first_aggdistinct_fields.is_clear_all())
8042 first_aggdistinct_fields.merge(cur_aggdistinct_fields);
8043 else if (first_aggdistinct_fields != cur_aggdistinct_fields)
8044 return false;
8045 }
8046
8047 return result;
8048 }
8049
8050
8051 /**
8052 Print keys that were appended to join_tab->const_keys because they
8053 can be used for GROUP BY or DISTINCT to the optimizer trace.
8054
8055 @param trace The optimizer trace context we're adding info to
8056 @param join_tab The table the indexes cover
8057 @param new_keys The keys that are considered useful because they can
8058 be used for GROUP BY or DISTINCT
8059 @param cause Zero-terminated string with reason for adding indexes
8060 to const_keys
8061
8062 @see add_group_and_distinct_keys()
8063 */
trace_indexes_added_group_distinct(Opt_trace_context * trace,const JOIN_TAB * join_tab,const key_map new_keys,const char * cause)8064 static void trace_indexes_added_group_distinct(Opt_trace_context *trace,
8065 const JOIN_TAB *join_tab,
8066 const key_map new_keys,
8067 const char* cause)
8068 {
8069 #ifdef OPTIMIZER_TRACE
8070 if (likely(!trace->is_started()))
8071 return;
8072
8073 KEY *key_info= join_tab->table()->key_info;
8074 key_map existing_keys= join_tab->const_keys;
8075 uint nbrkeys= join_tab->table()->s->keys;
8076
8077 Opt_trace_object trace_summary(trace, "const_keys_added");
8078 {
8079 Opt_trace_array trace_key(trace,"keys");
8080 for (uint j= 0 ; j < nbrkeys ; j++)
8081 if (new_keys.is_set(j) && !existing_keys.is_set(j))
8082 trace_key.add_utf8(key_info[j].name);
8083 }
8084 trace_summary.add_alnum("cause", cause);
8085 #endif
8086 }
8087
8088
8089 /**
8090 Discover the indexes that might be used for GROUP BY or DISTINCT queries.
8091
8092 If the query has a GROUP BY clause, find all indexes that contain
8093 all GROUP BY fields, and add those indexes to join_tab->const_keys
8094 and join_tab->keys.
8095
8096 If the query has a DISTINCT clause, find all indexes that contain
8097 all SELECT fields, and add those indexes to join_tab->const_keys and
8098 join_tab->keys. This allows later on such queries to be processed by
8099 a QUICK_GROUP_MIN_MAX_SELECT.
8100
8101 Note that indexes that are not usable for resolving GROUP
8102 BY/DISTINCT may also be added in some corner cases. For example, an
8103 index covering 'a' and 'b' is not usable for the following query but
8104 is still added: "SELECT DISTINCT a+b FROM t1". This is not a big
8105 issue because a) although the optimizer will consider using the
8106 index, it will not chose it (so minor calculation cost added but not
8107 wrong result) and b) it applies only to corner cases.
8108
8109 @param join
8110 @param join_tab
8111
8112 @return
8113 None
8114 */
8115
8116 static void
add_group_and_distinct_keys(JOIN * join,JOIN_TAB * join_tab)8117 add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab)
8118 {
8119 assert(join_tab->const_keys.is_subset(join_tab->keys()));
8120
8121 List<Item_field> indexed_fields;
8122 List_iterator<Item_field> indexed_fields_it(indexed_fields);
8123 ORDER *cur_group;
8124 Item_field *cur_item;
8125 const char *cause;
8126
8127 if (join->group_list)
8128 { /* Collect all query fields referenced in the GROUP clause. */
8129 for (cur_group= join->group_list; cur_group; cur_group= cur_group->next)
8130 (*cur_group->item)->walk(&Item::collect_item_field_processor,
8131 Item::WALK_POSTFIX,
8132 (uchar*) &indexed_fields);
8133 cause= "group_by";
8134 }
8135 else if (join->select_distinct)
8136 { /* Collect all query fields referenced in the SELECT clause. */
8137 List<Item> &select_items= join->fields_list;
8138 List_iterator<Item> select_items_it(select_items);
8139 Item *item;
8140 while ((item= select_items_it++))
8141 item->walk(&Item::collect_item_field_processor,
8142 Item::WALK_POSTFIX,
8143 (uchar*) &indexed_fields);
8144 cause= "distinct";
8145 }
8146 else if (join->tmp_table_param.sum_func_count &&
8147 is_indexed_agg_distinct(join, &indexed_fields))
8148 {
8149 /*
8150 SELECT list with AGGFN(distinct col). The query qualifies for
8151 loose index scan, and is_indexed_agg_distinct() has already
8152 collected all referenced fields into indexed_fields.
8153 */
8154 join->sort_and_group= 1;
8155 cause= "indexed_distinct_aggregate";
8156 }
8157 else
8158 return;
8159
8160 if (indexed_fields.elements == 0)
8161 return;
8162
8163 key_map possible_keys;
8164 possible_keys.set_all();
8165
8166 /* Intersect the keys of all group fields. */
8167 while ((cur_item= indexed_fields_it++))
8168 {
8169 if (cur_item->used_tables() != join_tab->table_ref->map())
8170 {
8171 /*
8172 Doing GROUP BY or DISTINCT on a field in another table so no
8173 index in this table is usable
8174 */
8175 return;
8176 }
8177 else
8178 possible_keys.intersect(cur_item->field->part_of_key);
8179 }
8180
8181 /*
8182 At this point, possible_keys has key bits set only for usable
8183 indexes because indexed_fields is non-empty and if any of the
8184 fields belong to a different table the function would exit in the
8185 loop above.
8186 */
8187
8188 if (!possible_keys.is_clear_all() &&
8189 !possible_keys.is_subset(join_tab->const_keys))
8190 {
8191 trace_indexes_added_group_distinct(&join->thd->opt_trace, join_tab,
8192 possible_keys, cause);
8193 join_tab->const_keys.merge(possible_keys);
8194 join_tab->keys().merge(possible_keys);
8195 }
8196
8197 assert(join_tab->const_keys.is_subset(join_tab->keys()));
8198 }
8199
8200 /**
8201 Update keyuse array with all possible keys we can use to fetch rows.
8202
8203 @param thd
8204 @param[out] keyuse Put here ordered array of Key_use structures
8205 @param join_tab Array in table number order
8206 @param tables Number of tables in join
8207 @param cond WHERE condition (note that the function analyzes
8208 join_tab[i]->join_cond() too)
8209 @param normal_tables Tables not inner w.r.t some outer join (ones
8210 for which we can make ref access based the WHERE
8211 clause)
8212 @param select_lex current SELECT
8213 @param[out] sargables Array of found sargable candidates
8214
8215 @retval
8216 0 OK
8217 @retval
8218 1 Out of memory.
8219 */
8220
8221 static bool
update_ref_and_keys(THD * thd,Key_use_array * keyuse,JOIN_TAB * join_tab,uint tables,Item * cond,COND_EQUAL * cond_equal,table_map normal_tables,SELECT_LEX * select_lex,SARGABLE_PARAM ** sargables)8222 update_ref_and_keys(THD *thd, Key_use_array *keyuse,JOIN_TAB *join_tab,
8223 uint tables, Item *cond, COND_EQUAL *cond_equal,
8224 table_map normal_tables, SELECT_LEX *select_lex,
8225 SARGABLE_PARAM **sargables)
8226 {
8227 uint and_level,i,found_eq_constant;
8228 Key_field *key_fields, *end, *field;
8229 size_t sz;
8230 uint m= max(select_lex->max_equal_elems, 1U);
8231 JOIN *const join= select_lex->join;
8232 /*
8233 We use the same piece of memory to store both Key_field
8234 and SARGABLE_PARAM structure.
8235 Key_field values are placed at the beginning this memory
8236 while SARGABLE_PARAM values are put at the end.
8237 All predicates that are used to fill arrays of Key_field
8238 and SARGABLE_PARAM structures have at most 2 arguments
8239 except BETWEEN predicates that have 3 arguments and
8240 IN predicates.
8241 This any predicate if it's not BETWEEN/IN can be used
8242 directly to fill at most 2 array elements, either of Key_field
8243 or SARGABLE_PARAM type. For a BETWEEN predicate 3 elements
8244 can be filled as this predicate is considered as
8245 saragable with respect to each of its argument.
8246 An IN predicate can require at most 1 element as currently
8247 it is considered as sargable only for its first argument.
8248 Multiple equality can add elements that are filled after
8249 substitution of field arguments by equal fields. There
8250 can be not more than select_lex->max_equal_elems such
8251 substitutions.
8252 */
8253 sz= max(sizeof(Key_field), sizeof(SARGABLE_PARAM)) *
8254 (((select_lex->cond_count + 1) * 2 +
8255 select_lex->between_count) * m + 1);
8256 if (!(key_fields=(Key_field*) thd->alloc(sz)))
8257 return TRUE; /* purecov: inspected */
8258 and_level= 0;
8259 field= end= key_fields;
8260 *sargables= (SARGABLE_PARAM *) key_fields +
8261 (sz - sizeof((*sargables)[0].field))/sizeof(SARGABLE_PARAM);
8262 /* set a barrier for the array of SARGABLE_PARAM */
8263 (*sargables)[0].field= 0;
8264
8265 if (cond)
8266 {
8267 add_key_fields(join, &end, &and_level, cond, normal_tables, sargables);
8268 for (Key_field *fld= field; fld != end ; fld++)
8269 {
8270 /* Mark that we can optimize LEFT JOIN */
8271 if (fld->val->type() == Item::NULL_ITEM &&
8272 !fld->item_field->field->real_maybe_null())
8273 {
8274 /*
8275 Example:
8276 SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.a WHERE t2.a IS NULL;
8277 this just wants rows of t1 where t1.a does not exist in t2.
8278 */
8279 fld->item_field->field->table->reginfo.not_exists_optimize= true;
8280 }
8281 }
8282 }
8283
8284 for (i=0 ; i < tables ; i++)
8285 {
8286 /*
8287 Block the creation of keys for inner tables of outer joins.
8288 Here only the outer joins that can not be converted to
8289 inner joins are left and all nests that can be eliminated
8290 are flattened.
8291 In the future when we introduce conditional accesses
8292 for inner tables in outer joins these keys will be taken
8293 into account as well.
8294 */
8295 if (join_tab[i].join_cond())
8296 add_key_fields(join, &end, &and_level,
8297 join_tab[i].join_cond(),
8298 join_tab[i].table_ref->map(), sargables);
8299 }
8300
8301 /* Process ON conditions for the nested joins */
8302 {
8303 List_iterator<TABLE_LIST> li(select_lex->top_join_list);
8304 TABLE_LIST *tl;
8305 while ((tl= li++))
8306 {
8307 if (tl->nested_join)
8308 add_key_fields_for_nj(join, tl, &end, &and_level, sargables);
8309 }
8310 }
8311
8312 /* Generate keys descriptions for derived tables */
8313 if (select_lex->materialized_derived_table_count)
8314 {
8315 if (join->generate_derived_keys())
8316 return true;
8317 }
8318 /* fill keyuse with found key parts */
8319 for ( ; field != end ; field++)
8320 {
8321 if (add_key_part(keyuse,field))
8322 return true;
8323 }
8324
8325 if (select_lex->ftfunc_list->elements)
8326 {
8327 if (add_ft_keys(keyuse, join_tab, cond, normal_tables, true))
8328 return true;
8329 }
8330
8331 /*
8332 Sort the array of possible keys and remove the following key parts:
8333 - ref if there is a keypart which is a ref and a const.
8334 (e.g. if there is a key(a,b) and the clause is a=3 and b=7 and b=t2.d,
8335 then we skip the key part corresponding to b=t2.d)
8336 - keyparts without previous keyparts
8337 (e.g. if there is a key(a,b,c) but only b < 5 (or a=2 and c < 3) is
8338 used in the query, we drop the partial key parts from consideration).
8339 Special treatment for ft-keys.
8340 */
8341 if (!keyuse->empty())
8342 {
8343 Key_use *save_pos, *use;
8344
8345 my_qsort(keyuse->begin(), keyuse->size(), keyuse->element_size(),
8346 reinterpret_cast<qsort_cmp>(sort_keyuse));
8347
8348 const Key_use key_end(NULL, NULL, 0, 0, 0, 0, 0, 0, false, NULL, 0);
8349 if (keyuse->push_back(key_end)) // added for easy testing
8350 return TRUE;
8351
8352 use= save_pos= keyuse->begin();
8353 const Key_use *prev= &key_end;
8354 found_eq_constant=0;
8355 for (i=0 ; i < keyuse->size()-1 ; i++,use++)
8356 {
8357 TABLE *const table= use->table_ref->table;
8358 if (!use->used_tables && use->optimize != KEY_OPTIMIZE_REF_OR_NULL)
8359 table->const_key_parts[use->key]|= use->keypart_map;
8360 if (use->keypart != FT_KEYPART)
8361 {
8362 if (use->key == prev->key && use->table_ref == prev->table_ref)
8363 {
8364 if (prev->keypart+1 < use->keypart ||
8365 (prev->keypart == use->keypart && found_eq_constant))
8366 continue; /* remove */
8367 }
8368 else if (use->keypart != 0) // First found must be 0
8369 continue;
8370 }
8371
8372 #if defined(__GNUC__) && !MY_GNUC_PREREQ(4,4)
8373 /*
8374 Old gcc used a memcpy(), which is undefined if save_pos==use:
8375 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19410
8376 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39480
8377 */
8378 if (save_pos != use)
8379 #endif
8380 *save_pos= *use;
8381 prev=use;
8382 found_eq_constant= !use->used_tables;
8383 /* Save ptr to first use */
8384 if (!table->reginfo.join_tab->keyuse())
8385 table->reginfo.join_tab->set_keyuse(save_pos);
8386 table->reginfo.join_tab->checked_keys.set_bit(use->key);
8387 save_pos++;
8388 }
8389 i= (uint) (save_pos - keyuse->begin());
8390 keyuse->at(i) = key_end;
8391 keyuse->chop(i);
8392 }
8393 print_keyuse_array(&thd->opt_trace, keyuse);
8394
8395 return false;
8396 }
8397
8398
8399 /**
8400 Create a keyuse array for a table with a primary key.
8401 To be used when creating a materialized temporary table.
8402
8403 @param thd THD pointer, for memory allocation
8404 @param table Table object representing table
8405 @param keyparts Number of key parts in the primary key
8406 @param outer_exprs List of items used for key lookup
8407
8408 @return Pointer to created keyuse array, or NULL if error
8409 */
create_keyuse_for_table(THD * thd,TABLE * table,uint keyparts,Item_field ** fields,List<Item> outer_exprs)8410 Key_use_array *create_keyuse_for_table(THD *thd, TABLE *table, uint keyparts,
8411 Item_field **fields,
8412 List<Item> outer_exprs)
8413 {
8414 void *mem= thd->alloc(sizeof(Key_use_array));
8415 if (!mem)
8416 return NULL;
8417 Key_use_array *keyuses= new (mem) Key_use_array(thd->mem_root);
8418
8419 List_iterator<Item> outer_expr(outer_exprs);
8420
8421 for (uint keypartno= 0; keypartno < keyparts; keypartno++)
8422 {
8423 Item *const item= outer_expr++;
8424 Key_field key_field(fields[keypartno], item, 0, 0, true,
8425 // null_rejecting must be true for field items only,
8426 // add_not_null_conds() is incapable of handling
8427 // other item types.
8428 (item->type() == Item::FIELD_ITEM),
8429 NULL, UINT_MAX);
8430 if (add_key_part(keyuses, &key_field))
8431 return NULL;
8432 }
8433 const Key_use key_end(NULL, NULL, 0, 0, 0, 0, 0, 0, false, NULL, 0);
8434 if (keyuses->push_back(key_end)) // added for easy testing
8435 return NULL;
8436
8437 return keyuses;
8438 }
8439
8440
8441 /**
8442 Move const tables first in the position array.
8443
8444 Increment the number of const tables and set same basic properties for the
8445 const table.
8446 A const table looked up by a key has type JT_CONST.
8447 A const table with a single row has type JT_SYSTEM.
8448
8449 @param tab Table that is designated as a const table
8450 @param key The key definition to use for this table (NULL if table scan)
8451 */
8452
mark_const_table(JOIN_TAB * tab,Key_use * key)8453 void JOIN::mark_const_table(JOIN_TAB *tab, Key_use *key)
8454 {
8455 POSITION *const position= positions + const_tables;
8456 position->table= tab;
8457 position->key= key;
8458 position->rows_fetched= 1.0; // This is a const table
8459 position->filter_effect= 1.0;
8460 position->prefix_rowcount= 1.0;
8461 position->read_cost= 0.0;
8462 position->ref_depend_map= 0;
8463 position->loosescan_key= MAX_KEY; // Not a LooseScan
8464 position->sj_strategy= SJ_OPT_NONE;
8465 positions->use_join_buffer= false;
8466
8467 // Move the const table as far down as possible in best_ref
8468 JOIN_TAB **pos= best_ref + const_tables + 1;
8469 for (JOIN_TAB *next= best_ref[const_tables]; next != tab; pos++)
8470 {
8471 JOIN_TAB *const tmp= pos[0];
8472 pos[0]= next;
8473 next= tmp;
8474 }
8475 best_ref[const_tables]= tab;
8476
8477 tab->set_type(key ? JT_CONST : JT_SYSTEM);
8478
8479 const_table_map|= tab->table_ref->map();
8480
8481 const_tables++;
8482 }
8483
8484
make_outerjoin_info()8485 void JOIN::make_outerjoin_info()
8486 {
8487 DBUG_ENTER("JOIN::make_outerjoin_info");
8488
8489 assert(select_lex->outer_join);
8490 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
8491
8492 select_lex->reset_nj_counters();
8493
8494 for (uint i= const_tables; i < tables; ++i)
8495 {
8496 JOIN_TAB *const tab= best_ref[i];
8497 TABLE *const table= tab->table();
8498 if (!table)
8499 continue;
8500
8501 TABLE_LIST *const tbl= tab->table_ref;
8502
8503 if (tbl->outer_join)
8504 {
8505 /*
8506 Table tab is the only one inner table for outer join.
8507 (Like table t4 for the table reference t3 LEFT JOIN t4 ON t3.a=t4.a
8508 is in the query above.)
8509 */
8510 tab->set_last_inner(i);
8511 tab->set_first_inner(i);
8512 tab->init_join_cond_ref(tbl);
8513 tab->cond_equal= tbl->cond_equal;
8514 /*
8515 If this outer join nest is embedded in another join nest,
8516 link the join-tabs:
8517 */
8518 TABLE_LIST *const outer_join_nest= tbl->outer_join_nest();
8519 if (outer_join_nest)
8520 tab->set_first_upper(outer_join_nest->nested_join->first_nested);
8521 }
8522 for (TABLE_LIST *embedding= tbl->embedding;
8523 embedding;
8524 embedding= embedding->embedding)
8525 {
8526 // Ignore join nests that are not outer join nests:
8527 if (!embedding->join_cond_optim())
8528 continue;
8529 NESTED_JOIN *const nested_join= embedding->nested_join;
8530 if (!nested_join->nj_counter)
8531 {
8532 /*
8533 Table tab is the first inner table for nested_join.
8534 Save reference to it in the nested join structure.
8535 */
8536 nested_join->first_nested= i;
8537 tab->init_join_cond_ref(embedding);
8538 tab->cond_equal= tbl->cond_equal;
8539
8540 TABLE_LIST *const outer_join_nest= embedding->outer_join_nest();
8541 if (outer_join_nest)
8542 tab->set_first_upper(outer_join_nest->nested_join->first_nested);
8543 }
8544 if (tab->first_inner() == NO_PLAN_IDX)
8545 tab->set_first_inner(nested_join->first_nested);
8546 if (++nested_join->nj_counter < nested_join->nj_total)
8547 break;
8548 // Table tab is the last inner table for nested join.
8549 best_ref[nested_join->first_nested]->set_last_inner(i);
8550 }
8551 }
8552 DBUG_VOID_RETURN;
8553 }
8554
8555 /**
8556 Build a condition guarded by match variables for embedded outer joins.
8557 When generating a condition for a table as part of an outer join condition
8558 or the WHERE condition, the table in question may also be part of an
8559 embedded outer join. In such cases, the condition must be guarded by
8560 the match variable for this embedded outer join. Such embedded outer joins
8561 may also be recursively embedded in other joins.
8562
8563 The function recursively adds guards for a condition ascending from tab
8564 to root_tab, which is the first inner table of an outer join,
8565 or NULL if the condition being handled is the WHERE clause.
8566
8567 @param idx index of the first inner table for the inner-most outer join
8568 @param cond the predicate to be guarded (must be set)
8569 @param root_idx index of the inner table to stop at
8570 (is NO_PLAN_IDX if this is the WHERE clause)
8571
8572 @return
8573 - pointer to the guarded predicate, if success
8574 - NULL if error
8575 */
8576
8577 static Item*
add_found_match_trig_cond(JOIN * join,plan_idx idx,Item * cond,plan_idx root_idx)8578 add_found_match_trig_cond(JOIN *join, plan_idx idx, Item *cond,
8579 plan_idx root_idx)
8580 {
8581 ASSERT_BEST_REF_IN_JOIN_ORDER(join);
8582 assert(cond);
8583
8584 for ( ; idx != root_idx; idx= join->best_ref[idx]->first_upper())
8585 {
8586 if (!(cond= new Item_func_trig_cond(cond, NULL, join, idx,
8587 Item_func_trig_cond::FOUND_MATCH)))
8588 return NULL;
8589
8590 cond->quick_fix_field();
8591 cond->update_used_tables();
8592 }
8593
8594 return cond;
8595 }
8596
8597
8598 /**
8599 Attach outer join conditions to generated table conditions in an optimal way.
8600
8601 @param last_tab - Last table that has been added to the current plan.
8602 Pre-condition: If this is the last inner table of an outer
8603 join operation, a join condition is attached to the first
8604 inner table of that outer join operation.
8605
8606 @return false if success, true if error.
8607
8608 Outer join conditions are attached to individual tables, but we can analyze
8609 those conditions only when reaching the last inner table of an outer join
8610 operation. Notice also that a table can be last within several outer join
8611 nests, hence the outer for() loop of this function.
8612
8613 Example:
8614 SELECT * FROM t1 LEFT JOIN (t2 LEFT JOIN t3 ON t2.a=t3.a) ON t1.a=t2.a
8615
8616 Table t3 is last both in the join nest (t2 - t3) and in (t1 - (t2 - t3))
8617 Thus, join conditions for both join nests will be evaluated when reaching
8618 this table.
8619
8620 For each outer join operation processed, the join condition is split
8621 optimally over the inner tables of the outer join. The split-out conditions
8622 are later referred to as table conditions (but note that several table
8623 conditions stemming from different join operations may be combined into
8624 a composite table condition).
8625
8626 Example:
8627 Consider the above query once more.
8628 The predicate t1.a=t2.a can be evaluated when rows from t1 and t2 are ready,
8629 ie at table t2. The predicate t2.a=t3.a can be evaluated at table t3.
8630
8631 Each non-constant split-out table condition is guarded by a match variable
8632 that enables it only when a matching row is found for all the embedded
8633 outer join operations.
8634
8635 Each split-out table condition is guarded by a variable that turns the
8636 condition off just before a null-complemented row for the outer join
8637 operation is formed. Thus, the join condition will not be checked for
8638 the null-complemented row.
8639 */
8640
attach_join_conditions(plan_idx last_tab)8641 bool JOIN::attach_join_conditions(plan_idx last_tab)
8642 {
8643 DBUG_ENTER("JOIN::attach_join_conditions");
8644 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
8645
8646 for (plan_idx first_inner= best_ref[last_tab]->first_inner();
8647 first_inner != NO_PLAN_IDX &&
8648 best_ref[first_inner]->last_inner() == last_tab;
8649 first_inner= best_ref[first_inner]->first_upper())
8650 {
8651 /*
8652 Table last_tab is the last inner table of an outer join, locate
8653 the corresponding join condition from the first inner table of the
8654 same outer join:
8655 */
8656 Item *const join_cond= best_ref[first_inner]->join_cond();
8657 assert(join_cond);
8658 /*
8659 Add the constant part of the join condition to the first inner table
8660 of the outer join.
8661 */
8662 Item *cond= make_cond_for_table(join_cond, const_table_map,
8663 (table_map) 0, false);
8664 if (cond)
8665 {
8666 cond= new Item_func_trig_cond(cond, NULL, this, first_inner,
8667 Item_func_trig_cond::IS_NOT_NULL_COMPL);
8668 if (!cond)
8669 DBUG_RETURN(true);
8670 if (cond->fix_fields(thd, NULL))
8671 DBUG_RETURN(true);
8672
8673 if (best_ref[first_inner]->and_with_condition(cond))
8674 DBUG_RETURN(true);
8675 }
8676 /*
8677 Split the non-constant part of the join condition into parts that
8678 can be attached to the inner tables of the outer join.
8679 */
8680 for (plan_idx i= first_inner; i <= last_tab; ++i)
8681 {
8682 table_map prefix_tables= best_ref[i]->prefix_tables();
8683 table_map added_tables= best_ref[i]->added_tables();
8684
8685 /*
8686 When handling the first inner table of an outer join, we may also
8687 reference all tables ahead of this table:
8688 */
8689 if (i == first_inner)
8690 added_tables= prefix_tables;
8691 /*
8692 We need RAND_TABLE_BIT on the last inner table, in case there is a
8693 non-deterministic function in the join condition.
8694 (RAND_TABLE_BIT is set for the last table of the join plan,
8695 but this is not sufficient for join conditions, which may have a
8696 last inner table that is ahead of the last table of the join plan).
8697 */
8698 if (i == last_tab)
8699 {
8700 prefix_tables|= RAND_TABLE_BIT;
8701 added_tables|= RAND_TABLE_BIT;
8702 }
8703 cond= make_cond_for_table(join_cond, prefix_tables, added_tables, false);
8704 if (cond == NULL)
8705 continue;
8706 /*
8707 If the table is part of an outer join that is embedded in the
8708 outer join currently being processed, wrap the condition in
8709 triggered conditions for match variables of such embedded outer joins.
8710 */
8711 if (!(cond= add_found_match_trig_cond(this, best_ref[i]->first_inner(),
8712 cond, first_inner)))
8713 DBUG_RETURN(true);
8714
8715 // Add the guard turning the predicate off for the null-complemented row.
8716 cond= new Item_func_trig_cond(cond, NULL, this, first_inner,
8717 Item_func_trig_cond::IS_NOT_NULL_COMPL);
8718 if (!cond)
8719 DBUG_RETURN(true);
8720 if (cond->fix_fields(thd, NULL))
8721 DBUG_RETURN(true);
8722
8723 // Add the generated condition to the existing table condition
8724 if (best_ref[i]->and_with_condition(cond))
8725 DBUG_RETURN(true);
8726 }
8727 }
8728
8729 DBUG_RETURN(false);
8730 }
8731
8732
8733 /*****************************************************************************
8734 Remove calculation with tables that aren't yet read. Remove also tests
8735 against fields that are read through key where the table is not a
8736 outer join table.
8737 We can't remove tests that are made against columns which are stored
8738 in sorted order.
8739 *****************************************************************************/
8740
8741 static Item *
part_of_refkey(TABLE * table,TABLE_REF * ref,Field * field)8742 part_of_refkey(TABLE *table, TABLE_REF *ref, Field *field)
8743 {
8744 uint ref_parts= ref->key_parts;
8745 if (ref_parts)
8746 {
8747 if (ref->has_guarded_conds())
8748 return NULL;
8749
8750 const KEY_PART_INFO *key_part= table->key_info[ref->key].key_part;
8751
8752 for (uint part=0 ; part < ref_parts ; part++,key_part++)
8753 if (field->eq(key_part->field) &&
8754 !(key_part->key_part_flag & HA_PART_KEY_SEG))
8755 return ref->items[part];
8756 }
8757 return NULL;
8758 }
8759
8760
8761 /**
8762 @return
8763 1 if right_item is used removable reference key on left_item
8764
8765 @note see comments in make_cond_for_table_from_pred() about careful
8766 usage/modifications of test_if_ref().
8767 */
8768
test_if_ref(Item * root_cond,Item_field * left_item,Item * right_item)8769 static bool test_if_ref(Item *root_cond,
8770 Item_field *left_item,Item *right_item)
8771 {
8772 if (left_item->depended_from)
8773 return false; // don't even read join_tab of inner subquery!
8774 Field *field=left_item->field;
8775 JOIN_TAB *join_tab= field->table->reginfo.join_tab;
8776 if (join_tab)
8777 ASSERT_BEST_REF_IN_JOIN_ORDER(join_tab->join());
8778 // No need to change const test
8779 if (!field->table->const_table && join_tab &&
8780 (join_tab->first_inner() == NO_PLAN_IDX ||
8781 join_tab->join()->best_ref[join_tab->first_inner()]->join_cond() == root_cond) &&
8782 /* "ref_or_null" implements "x=y or x is null", not "x=y" */
8783 (join_tab->type() != JT_REF_OR_NULL))
8784 {
8785 Item *ref_item= part_of_refkey(field->table, &join_tab->ref(), field);
8786 if (ref_item && ref_item->eq(right_item,1))
8787 {
8788 right_item= right_item->real_item();
8789 if (right_item->type() == Item::FIELD_ITEM)
8790 return (field->eq_def(((Item_field *) right_item)->field));
8791 /* remove equalities injected by IN->EXISTS transformation */
8792 else if (right_item->type() == Item::CACHE_ITEM)
8793 return ((Item_cache *)right_item)->eq_def (field);
8794 if (right_item->const_item() && !(right_item->is_null()))
8795 {
8796 /*
8797 We can remove all fields except:
8798 1. String data types:
8799 - For BINARY/VARBINARY fields with equality against a
8800 string: Ref access can return more rows than match the
8801 string. The reason seems to be that the string constant
8802 is not "padded" to the full length of the field when
8803 setting up ref access. @todo Change how ref access for
8804 BINARY/VARBINARY fields are done so that only qualifying
8805 rows are returned from the storage engine.
8806 2. Float data type: Comparison of float can differ
8807 - When we search "WHERE field=value" using an index,
8808 the "value" side is converted from double to float by
8809 Field_float::store(), then two floats are compared.
8810 - When we search "WHERE field=value" without indexes,
8811 the "field" side is converted from float to double by
8812 Field_float::val_real(), then two doubles are compared.
8813 Note about string data types: All currently existing
8814 collations have "PAD SPACE" style. If we introduce "NO PAD"
8815 collations this function must return false for such
8816 collations, because trailing space compression for indexes
8817 makes the table value and the index value not equal to each
8818 other in "NO PAD" collations. As index lookup strips
8819 trailing spaces, it can return false candidates. Further
8820 comparison of the actual table values is required.
8821 */
8822 if (!((field->type() == MYSQL_TYPE_STRING || // 1
8823 field->type() == MYSQL_TYPE_VARCHAR) && field->binary()) &&
8824 !(field->type() == MYSQL_TYPE_FLOAT && field->decimals() > 0)) // 2
8825 {
8826 return !right_item->save_in_field_no_warnings(field, true);
8827 }
8828 }
8829 }
8830 }
8831 return 0; // keep test
8832 }
8833
8834
8835 /*
8836 Remove the predicates pushed down into the subquery
8837
8838 DESCRIPTION
8839 Given that this join will be executed using (unique|index)_subquery,
8840 without "checking NULL", remove the predicates that were pushed down
8841 into the subquery.
8842
8843 If the subquery compares scalar values, we can remove the condition that
8844 was wrapped into trig_cond (it will be checked when needed by the subquery
8845 engine)
8846
8847 If the subquery compares row values, we need to keep the wrapped
8848 equalities in the WHERE clause: when the left (outer) tuple has both NULL
8849 and non-NULL values, we'll do a full table scan and will rely on the
8850 equalities corresponding to non-NULL parts of left tuple to filter out
8851 non-matching records.
8852
8853 If '*where' is a triggered condition, or contains 'OR x IS NULL', or
8854 contains a condition coming from the original subquery's WHERE clause, or
8855 if there are more than one outer expressions, then WHERE is not of the
8856 simple form:
8857 outer_expr = inner_expr
8858 and thus this function does nothing.
8859
8860 If the index is on prefix (=> test_if_ref() is false), then the equality
8861 is needed as post-filter, so this function does nothing.
8862
8863 TODO: We can remove the equalities that will be guaranteed to be true by the
8864 fact that subquery engine will be using index lookup. This must be done only
8865 for cases where there are no conversion errors of significance, e.g. 257
8866 that is searched in a byte. But this requires homogenization of the return
8867 codes of all Field*::store() methods.
8868 */
remove_subq_pushed_predicates()8869 void JOIN::remove_subq_pushed_predicates()
8870 {
8871 if (where_cond->type() != Item::FUNC_ITEM)
8872 return;
8873 Item_func *const func= static_cast<Item_func *>(where_cond);
8874 if (func->functype() == Item_func::EQ_FUNC &&
8875 func->arguments()[0]->type() == Item::REF_ITEM &&
8876 func->arguments()[1]->type() == Item::FIELD_ITEM &&
8877 test_if_ref(func,
8878 static_cast<Item_field *>(func->arguments()[1]),
8879 func->arguments()[0]))
8880 {
8881 where_cond= NULL;
8882 return;
8883 }
8884 }
8885
8886
8887 /**
8888 @brief
8889 Add keys to derived tables'/views' result tables in a list
8890
8891 @param select_lex generate derived keys for select_lex's derived tables
8892
8893 @details
8894 This function generates keys for all derived tables/views of the select_lex
8895 to which this join corresponds to with help of the TABLE_LIST:generate_keys
8896 function.
8897
8898 @return FALSE all keys were successfully added.
8899 @return TRUE OOM error
8900 */
8901
generate_derived_keys()8902 bool JOIN::generate_derived_keys()
8903 {
8904 assert(select_lex->materialized_derived_table_count);
8905
8906 for (TABLE_LIST *table= select_lex->leaf_tables;
8907 table;
8908 table= table->next_leaf)
8909 {
8910 table->derived_keys_ready= TRUE;
8911 /* Process tables that aren't materialized yet. */
8912 if (table->uses_materialization() && !table->table->is_created() &&
8913 table->generate_keys())
8914 return TRUE;
8915 }
8916 return FALSE;
8917 }
8918
8919
8920 /**
8921 @brief
8922 Drop unused keys for each materialized derived table/view
8923
8924 @details
8925 For each materialized derived table/view, call TABLE::use_index to save one
8926 index chosen by the optimizer and ignore others. If no key is chosen, then all
8927 keys will be ignored.
8928 */
8929
drop_unused_derived_keys()8930 void JOIN::drop_unused_derived_keys()
8931 {
8932 assert(select_lex->materialized_derived_table_count);
8933 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
8934
8935 for (uint i= 0 ; i < tables ; i++)
8936 {
8937 JOIN_TAB *tab= best_ref[i];
8938 TABLE *table= tab->table();
8939 /*
8940 Save chosen key description if:
8941 1) it's a materialized derived table
8942 2) it's not yet instantiated
8943 3) some keys are defined for it
8944 */
8945 if (table &&
8946 tab->table_ref->uses_materialization() && // (1)
8947 !table->is_created() && // (2)
8948 table->max_keys > 0) // (3)
8949 {
8950 Key_use *keyuse= tab->position()->key;
8951
8952 table->use_index(keyuse ? keyuse->key : -1);
8953
8954 const bool key_is_const= keyuse && tab->const_keys.is_set(keyuse->key);
8955 tab->const_keys.clear_all();
8956 tab->keys().clear_all();
8957
8958 if (!keyuse)
8959 continue;
8960
8961 /*
8962 Update the selected "keyuse" to point to key number 0.
8963 Notice that unused keyuse entries still point to the deleted
8964 candidate keys. tab->keys (and tab->const_keys if the chosen key
8965 is constant) should reference key object no. 0 as well.
8966 */
8967 tab->keys().set_bit(0);
8968 if (key_is_const)
8969 tab->const_keys.set_bit(0);
8970
8971 const uint oldkey= keyuse->key;
8972 for (; keyuse->table_ref == tab->table_ref && keyuse->key == oldkey;
8973 keyuse++)
8974 keyuse->key= 0;
8975 }
8976 }
8977 }
8978
8979
8980 /**
8981 Cache constant expressions in WHERE, HAVING, ON conditions.
8982
8983 @return False if success, True if error
8984
8985 @note This function is run after conditions have been pushed down to
8986 individual tables, so transformation is applied to JOIN_TAB::condition
8987 and not to the WHERE condition.
8988 */
8989
cache_const_exprs()8990 bool JOIN::cache_const_exprs()
8991 {
8992 /* No need in cache if all tables are constant. */
8993 assert(!plan_is_const());
8994 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
8995
8996 for (uint i= const_tables; i < tables; i++)
8997 {
8998 Item *condition= best_ref[i]->condition();
8999 if (condition == NULL)
9000 continue;
9001 Item *cache_item= NULL;
9002 Item **analyzer_arg= &cache_item;
9003 condition=
9004 condition->compile(&Item::cache_const_expr_analyzer,
9005 (uchar **)&analyzer_arg,
9006 &Item::cache_const_expr_transformer,
9007 (uchar *)&cache_item);
9008 if (condition == NULL)
9009 return true;
9010 best_ref[i]->set_condition(condition);
9011 }
9012 if (having_cond)
9013 {
9014 Item *cache_item= NULL;
9015 Item **analyzer_arg= &cache_item;
9016 having_cond= having_cond->compile(&Item::cache_const_expr_analyzer,
9017 (uchar **)&analyzer_arg,
9018 &Item::cache_const_expr_transformer,
9019 (uchar *)&cache_item);
9020 if (having_cond == NULL)
9021 return true;
9022 }
9023 return false;
9024 }
9025
9026
9027 /**
9028 Extract a condition that can be checked after reading given table
9029
9030 @param cond Condition to analyze
9031 @param tables Tables for which "current field values" are available
9032 @param used_table Table(s) that we are extracting the condition for (may
9033 also include PSEUDO_TABLE_BITS, and may be zero)
9034 @param exclude_expensive_cond Do not push expensive conditions
9035
9036 @retval <>NULL Generated condition
9037 @retval = NULL Already checked, OR error
9038
9039 @details
9040 Extract the condition that can be checked after reading the table(s)
9041 specified in @c used_table, given that current-field values for tables
9042 specified in @c tables bitmap are available.
9043 If @c used_table is 0, extract conditions for all tables in @c tables.
9044
9045 This function can be used to extract conditions relevant for a table
9046 in a join order. Together with its caller, it will ensure that all
9047 conditions are attached to the first table in the join order where all
9048 necessary fields are available, and it will also ensure that a given
9049 condition is attached to only one table.
9050 To accomplish this, first initialize @c tables to the empty
9051 set. Then, loop over all tables in the join order, set @c used_table to
9052 the bit representing the current table, accumulate @c used_table into the
9053 @c tables set, and call this function. To ensure correct handling of
9054 const expressions and outer references, add the const table map and
9055 OUTER_REF_TABLE_BIT to @c used_table for the first table. To ensure
9056 that random expressions are evaluated for the final table, add
9057 RAND_TABLE_BIT to @c used_table for the final table.
9058
9059 The function assumes that constant, inexpensive parts of the condition
9060 have already been checked. Constant, expensive parts will be attached
9061 to the first table in the join order, provided that the above call
9062 sequence is followed.
9063
9064 The call order will ensure that conditions covering tables in @c tables
9065 minus those in @c used_table, have already been checked.
9066
9067 The function takes into account that some parts of the condition are
9068 guaranteed to be true by employed 'ref' access methods (the code that
9069 does this is located at the end, search down for "EQ_FUNC").
9070
9071 @note
9072 make_cond_for_info_schema() uses an algorithm similar to
9073 make_cond_for_table().
9074 */
9075
9076 Item *
make_cond_for_table(Item * cond,table_map tables,table_map used_table,bool exclude_expensive_cond)9077 make_cond_for_table(Item *cond, table_map tables, table_map used_table,
9078 bool exclude_expensive_cond)
9079 {
9080 return make_cond_for_table_from_pred(cond, cond, tables, used_table,
9081 exclude_expensive_cond);
9082 }
9083
9084 static Item *
make_cond_for_table_from_pred(Item * root_cond,Item * cond,table_map tables,table_map used_table,bool exclude_expensive_cond)9085 make_cond_for_table_from_pred(Item *root_cond, Item *cond,
9086 table_map tables, table_map used_table,
9087 bool exclude_expensive_cond)
9088 {
9089 /*
9090 Ignore this condition if
9091 1. We are extracting conditions for a specific table, and
9092 2. that table is not referenced by the condition, but not if
9093 3. this is a constant condition not checked at optimization time and
9094 this is the first table we are extracting conditions for.
9095 (Assuming that used_table == tables for the first table.)
9096 */
9097 if (used_table && // 1
9098 !(cond->used_tables() & used_table) && // 2
9099 !(cond->is_expensive() && used_table == tables)) // 3
9100 return NULL;
9101
9102 if (cond->type() == Item::COND_ITEM)
9103 {
9104 if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
9105 {
9106 /* Create new top level AND item */
9107 Item_cond_and *new_cond= new Item_cond_and;
9108 if (!new_cond)
9109 return NULL;
9110 List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
9111 Item *item;
9112 while ((item= li++))
9113 {
9114 Item *fix= make_cond_for_table_from_pred(root_cond, item,
9115 tables, used_table,
9116 exclude_expensive_cond);
9117 if (fix)
9118 new_cond->argument_list()->push_back(fix);
9119 }
9120 switch (new_cond->argument_list()->elements) {
9121 case 0:
9122 return NULL; // Always true
9123 case 1:
9124 return new_cond->argument_list()->head();
9125 default:
9126 if (new_cond->fix_fields(current_thd, NULL))
9127 return NULL;
9128 return new_cond;
9129 }
9130 }
9131 else
9132 { // Or list
9133 Item_cond_or *new_cond= new Item_cond_or;
9134 if (!new_cond)
9135 return NULL;
9136 List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
9137 Item *item;
9138 while ((item= li++))
9139 {
9140 Item *fix= make_cond_for_table_from_pred(root_cond, item,
9141 tables, 0L,
9142 exclude_expensive_cond);
9143 if (!fix)
9144 return NULL; // Always true
9145 new_cond->argument_list()->push_back(fix);
9146 }
9147 if (new_cond->fix_fields(current_thd, NULL))
9148 return NULL;
9149 return new_cond;
9150 }
9151 }
9152
9153 /*
9154 Omit this condition if
9155 1. It has been marked as omittable before, or
9156 2. Some tables referred by the condition are not available, or
9157 3. We are extracting conditions for all tables, the condition is
9158 considered 'expensive', and we want to delay evaluation of such
9159 conditions to the execution phase.
9160 */
9161 if (cond->marker == 3 || // 1
9162 (cond->used_tables() & ~tables) || // 2
9163 (!used_table && exclude_expensive_cond && cond->is_expensive())) // 3
9164 return NULL;
9165
9166 /*
9167 Extract this condition if
9168 1. It has already been marked as applicable, or
9169 2. It is not a <comparison predicate> (=, <, >, <=, >=, <=>)
9170 */
9171 if (cond->marker == 2 || // 1
9172 cond->eq_cmp_result() == Item::COND_OK) // 2
9173 return cond;
9174
9175 /*
9176 Remove equalities that are guaranteed to be true by use of 'ref' access
9177 method.
9178 Note that ref access implements "table1.field1 <=> table2.indexed_field2",
9179 i.e. if it passed a NULL field1, it will return NULL indexed_field2 if
9180 there are.
9181 Thus the equality "table1.field1 = table2.indexed_field2",
9182 is equivalent to "ref access AND table1.field1 IS NOT NULL"
9183 i.e. "ref access and proper setting/testing of ref->null_rejecting".
9184 Thus, we must be careful, that when we remove equalities below we also
9185 set ref->null_rejecting, and test it at execution; otherwise wrong NULL
9186 matches appear.
9187 So:
9188 - for the optimization phase, the code which is below, and the code in
9189 test_if_ref(), and in add_key_field(), must be kept in sync: if the
9190 applicability conditions in one place are relaxed, they should also be
9191 relaxed elsewhere.
9192 - for the execution phase, all possible execution methods must test
9193 ref->null_rejecting.
9194 */
9195 if (cond->type() == Item::FUNC_ITEM &&
9196 ((Item_func*) cond)->functype() == Item_func::EQ_FUNC)
9197 {
9198 Item *left_item= ((Item_func*) cond)->arguments()[0]->real_item();
9199 Item *right_item= ((Item_func*) cond)->arguments()[1]->real_item();
9200 if ((left_item->type() == Item::FIELD_ITEM &&
9201 test_if_ref(root_cond, (Item_field*) left_item, right_item)) ||
9202 (right_item->type() == Item::FIELD_ITEM &&
9203 test_if_ref(root_cond, (Item_field*) right_item, left_item)))
9204 {
9205 cond->marker= 3; // Condition can be omitted
9206 return NULL;
9207 }
9208 }
9209 cond->marker= 2; // Mark condition as applicable
9210 return cond;
9211 }
9212
9213
9214 /**
9215 Separates the predicates in a join condition and pushes them to the
9216 join step where all involved tables are available in the join prefix.
9217 ON clauses from JOIN expressions are also pushed to the most appropriate step.
9218
9219 @param join Join object where predicates are pushed.
9220
9221 @param cond Pointer to condition which may contain an arbitrary number of
9222 predicates, combined using AND, OR and XOR items.
9223 If NULL, equivalent to a predicate that returns TRUE for all
9224 row combinations.
9225
9226
9227 @retval true Found impossible WHERE clause, or out-of-memory
9228 @retval false Other
9229 */
9230
make_join_select(JOIN * join,Item * cond)9231 static bool make_join_select(JOIN *join, Item *cond)
9232 {
9233 THD *thd= join->thd;
9234 Opt_trace_context * const trace= &thd->opt_trace;
9235 DBUG_ENTER("make_join_select");
9236 ASSERT_BEST_REF_IN_JOIN_ORDER(join);
9237
9238 // Add IS NOT NULL conditions to table conditions:
9239 add_not_null_conds(join);
9240
9241 /*
9242 Extract constant conditions that are part of the WHERE clause.
9243 Constant parts of join conditions from outer joins are attached to
9244 the appropriate table condition in JOIN::attach_join_conditions().
9245 */
9246 if (cond) /* Because of QUICK_GROUP_MIN_MAX_SELECT */
9247 { /* there may be a select without a cond. */
9248 if (join->primary_tables > 1)
9249 cond->update_used_tables(); // Table number may have changed
9250 if (join->plan_is_const() &&
9251 join->select_lex->master_unit() ==
9252 thd->lex->unit) // The outer-most query block
9253 join->const_table_map|= RAND_TABLE_BIT;
9254 }
9255 /*
9256 Extract conditions that depend on constant tables.
9257 The const part of the query's WHERE clause can be checked immediately
9258 and if it is not satisfied then the join has empty result
9259 */
9260 Item *const_cond= NULL;
9261 if (cond)
9262 const_cond= make_cond_for_table(cond, join->const_table_map,
9263 (table_map) 0, true);
9264
9265 // Add conditions added by add_not_null_conds()
9266 for (uint i= 0; i < join->const_tables; i++)
9267 {
9268 if (and_conditions(&const_cond, join->best_ref[i]->condition()))
9269 DBUG_RETURN(true);
9270 }
9271 DBUG_EXECUTE("where", print_where(const_cond, "constants", QT_ORDINARY););
9272 if (const_cond != NULL)
9273 {
9274 const bool const_cond_result= const_cond->val_int() != 0;
9275 if (thd->is_error())
9276 DBUG_RETURN(true);
9277
9278 Opt_trace_object trace_const_cond(trace);
9279 trace_const_cond.add("condition_on_constant_tables", const_cond)
9280 .add("condition_value", const_cond_result);
9281 if (!const_cond_result)
9282 {
9283 DBUG_PRINT("info",("Found impossible WHERE condition"));
9284 DBUG_RETURN(true);
9285 }
9286 }
9287
9288 /*
9289 Extract remaining conditions from WHERE clause and join conditions,
9290 and attach them to the most appropriate table condition. This means that
9291 a condition will be evaluated as soon as all fields it depends on are
9292 available. For outer join conditions, the additional criterion is that
9293 we must have determined whether outer-joined rows are available, or
9294 have been NULL-extended, see JOIN::attach_join_conditions() for details.
9295 */
9296 {
9297 Opt_trace_object trace_wrapper(trace);
9298 Opt_trace_object
9299 trace_conditions(trace, "attaching_conditions_to_tables");
9300 trace_conditions.add("original_condition", cond);
9301 Opt_trace_array
9302 trace_attached_comp(trace, "attached_conditions_computation");
9303
9304 for (uint i=join->const_tables ; i < join->tables ; i++)
9305 {
9306 JOIN_TAB *const tab= join->best_ref[i];
9307
9308 if (!tab->position())
9309 continue;
9310 /*
9311 first_inner is the X in queries like:
9312 SELECT * FROM t1 LEFT OUTER JOIN (t2 JOIN t3) ON X
9313 */
9314 const plan_idx first_inner= tab->first_inner();
9315 const table_map used_tables= tab->prefix_tables();
9316 const table_map current_map= tab->added_tables();
9317 Item *tmp= NULL;
9318
9319 if (cond)
9320 tmp= make_cond_for_table(cond,used_tables,current_map, 0);
9321 /* Add conditions added by add_not_null_conds(). */
9322 if (tab->condition() && and_conditions(&tmp, tab->condition()))
9323 DBUG_RETURN(true);
9324
9325
9326 if (cond && !tmp && tab->quick())
9327 { // Outer join
9328 assert(tab->type() == JT_RANGE || tab->type() == JT_INDEX_MERGE);
9329 /*
9330 Hack to handle the case where we only refer to a table
9331 in the ON part of an OUTER JOIN. In this case we want the code
9332 below to check if we should use 'quick' instead.
9333 */
9334 DBUG_PRINT("info", ("Item_int"));
9335 tmp= new Item_int((longlong) 1,1); // Always true
9336 }
9337 if (tmp || !cond || tab->type() == JT_REF || tab->type() == JT_REF_OR_NULL ||
9338 tab->type() == JT_EQ_REF || first_inner != NO_PLAN_IDX)
9339 {
9340 DBUG_EXECUTE("where",print_where(tmp,tab->table()->alias, QT_ORDINARY););
9341 /*
9342 If tab is an inner table of an outer join operation,
9343 add a match guard to the pushed down predicate.
9344 The guard will turn the predicate on only after
9345 the first match for outer tables is encountered.
9346 */
9347 if (cond && tmp)
9348 {
9349 /*
9350 Because of QUICK_GROUP_MIN_MAX_SELECT there may be a select without
9351 a cond, so neutralize the hack above.
9352 */
9353 if (!(tmp= add_found_match_trig_cond(join, first_inner, tmp, NO_PLAN_IDX)))
9354 DBUG_RETURN(true);
9355 tab->set_condition(tmp);
9356 /* Push condition to storage engine if this is enabled
9357 and the condition is not guarded */
9358 if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN) &&
9359 first_inner == NO_PLAN_IDX)
9360 {
9361 Item *push_cond=
9362 make_cond_for_table(tmp, tab->table_ref->map(),
9363 tab->table_ref->map(), 0);
9364 if (push_cond)
9365 {
9366 /* Push condition to handler */
9367 if (!tab->table()->file->cond_push(push_cond))
9368 tab->table()->file->pushed_cond= push_cond;
9369 }
9370 }
9371 }
9372 else
9373 {
9374 tab->set_condition(NULL);
9375 }
9376
9377 DBUG_EXECUTE("where",print_where(tmp,tab->table()->alias, QT_ORDINARY););
9378
9379 if (tab->quick())
9380 {
9381 if (tab->needed_reg.is_clear_all() && tab->type() != JT_CONST)
9382 {
9383 /*
9384 We keep (for now) the QUICK AM calculated in
9385 get_quick_record_count().
9386 */
9387 assert(tab->quick()->is_valid());
9388 }
9389 else
9390 {
9391 delete tab->quick();
9392 tab->set_quick(NULL);
9393 }
9394 }
9395
9396 if ((tab->type() == JT_ALL || tab->type() == JT_RANGE ||
9397 tab->type() == JT_INDEX_MERGE || tab->type() == JT_INDEX_SCAN) &&
9398 tab->use_quick != QS_RANGE)
9399 {
9400 /*
9401 We plan to scan (table/index/range scan).
9402 Check again if we should use an index. We can use an index if:
9403
9404 1a) There is a condition that range optimizer can work on, and
9405 1b) There are non-constant conditions on one or more keys, and
9406 1c) Some of the non-constant fields may have been read
9407 already. This may be the case if this is not the first
9408 table in the join OR this is a subselect with
9409 non-constant conditions referring to an outer table
9410 (dependent subquery)
9411 or,
9412 2a) There are conditions only relying on constants
9413 2b) This is the first non-constant table
9414 2c) There is a limit of rows to read that is lower than
9415 the fanout for this table, predicate filters included
9416 (i.e., the estimated number of rows that will be
9417 produced for this table per row combination of
9418 previous tables)
9419 2d) The query is NOT run with FOUND_ROWS() (because in that
9420 case we have to scan through all rows to count them anyway)
9421 */
9422 enum { DONT_RECHECK, NOT_FIRST_TABLE, LOW_LIMIT }
9423 recheck_reason= DONT_RECHECK;
9424
9425 assert(tab->const_keys.is_subset(tab->keys()));
9426
9427 const join_type orig_join_type= tab->type();
9428 const QUICK_SELECT_I *const orig_quick= tab->quick();
9429
9430 if (cond && // 1a
9431 (tab->keys() != tab->const_keys) && // 1b
9432 (i > 0 || // 1c
9433 (join->select_lex->master_unit()->item &&
9434 cond->used_tables() & OUTER_REF_TABLE_BIT)))
9435 recheck_reason= NOT_FIRST_TABLE;
9436 else if (!tab->const_keys.is_clear_all() && // 2a
9437 i == join->const_tables && // 2b
9438 (join->unit->select_limit_cnt <
9439 (tab->position()->rows_fetched *
9440 tab->position()->filter_effect)) && // 2c
9441 !join->calc_found_rows) // 2d
9442 recheck_reason= LOW_LIMIT;
9443
9444 if (tab->position()->sj_strategy == SJ_OPT_LOOSE_SCAN)
9445 {
9446 /*
9447 Semijoin loose scan has settled for a certain index-based access
9448 method with suitable characteristics, don't substitute it.
9449 */
9450 recheck_reason= DONT_RECHECK;
9451 }
9452
9453 if (recheck_reason != DONT_RECHECK)
9454 {
9455 Opt_trace_object trace_one_table(trace);
9456 trace_one_table.add_utf8_table(tab->table_ref);
9457 Opt_trace_object trace_table(trace, "rechecking_index_usage");
9458 if (recheck_reason == NOT_FIRST_TABLE)
9459 trace_table.add_alnum("recheck_reason", "not_first_table");
9460 else
9461 trace_table.add_alnum("recheck_reason", "low_limit").
9462 add("limit", join->unit->select_limit_cnt).
9463 add("row_estimate",
9464 tab->position()->rows_fetched *
9465 tab->position()->filter_effect);
9466
9467 /* Join with outer join condition */
9468 Item *orig_cond= tab->condition();
9469 tab->and_with_condition(tab->join_cond());
9470
9471 /*
9472 We can't call sel->cond->fix_fields,
9473 as it will break tab->join_cond() if it's AND condition
9474 (fix_fields currently removes extra AND/OR levels).
9475 Yet attributes of the just built condition are not needed.
9476 Thus we call sel->cond->quick_fix_field for safety.
9477 */
9478 if (tab->condition() && !tab->condition()->fixed)
9479 tab->condition()->quick_fix_field();
9480
9481 key_map usable_keys= tab->keys();
9482 ORDER::enum_order interesting_order= ORDER::ORDER_NOT_RELEVANT;
9483
9484 if (recheck_reason == LOW_LIMIT)
9485 {
9486 int read_direction= 0;
9487
9488 /*
9489 If the current plan is to use range, then check if the
9490 already selected index provides the order dictated by the
9491 ORDER BY clause.
9492 */
9493 if (tab->quick() && tab->quick()->index != MAX_KEY)
9494 {
9495 const uint ref_key= tab->quick()->index;
9496
9497 read_direction= test_if_order_by_key(join->order,
9498 tab->table(), ref_key);
9499 /*
9500 If the index provides order there is no need to recheck
9501 index usage; we already know from the former call to
9502 test_quick_select() that a range scan on the chosen
9503 index is cheapest. Note that previous calls to
9504 test_quick_select() did not take order direction
9505 (ASC/DESC) into account, so in case of DESC ordering
9506 we still need to recheck.
9507 */
9508 if ((read_direction == 1) ||
9509 (read_direction == -1 && tab->quick()->reverse_sorted()))
9510 {
9511 recheck_reason= DONT_RECHECK;
9512 }
9513 }
9514 /*
9515 We do a cost based search for an ordering index here. Do this
9516 only if prefer_ordering_index switch is on or an index is
9517 forced for order by
9518 */
9519 if (recheck_reason != DONT_RECHECK &&
9520 (tab->table()->force_index_order ||
9521 thd->optimizer_switch_flag(
9522 OPTIMIZER_SWITCH_PREFER_ORDERING_INDEX)))
9523 {
9524 int best_key= -1;
9525 ha_rows select_limit= join->unit->select_limit_cnt;
9526
9527 /* Use index specified in FORCE INDEX FOR ORDER BY, if any. */
9528 if (tab->table()->force_index)
9529 usable_keys.intersect(tab->table()->keys_in_use_for_order_by);
9530
9531 /* Do a cost based search on the indexes that give sort order */
9532 test_if_cheaper_ordering(tab, join->order, tab->table(),
9533 usable_keys, -1, select_limit,
9534 &best_key, &read_direction,
9535 &select_limit);
9536 if (best_key < 0)
9537 recheck_reason= DONT_RECHECK; // No usable keys
9538 else
9539 {
9540 // Only usable_key is the best_key chosen
9541 usable_keys.clear_all();
9542 usable_keys.set_bit(best_key);
9543 interesting_order= (read_direction == -1 ? ORDER::ORDER_DESC :
9544 ORDER::ORDER_ASC);
9545 }
9546 }
9547 }
9548
9549 bool search_if_impossible= recheck_reason != DONT_RECHECK;
9550 if (search_if_impossible)
9551 {
9552 if (tab->quick())
9553 {
9554 delete tab->quick();
9555 tab->set_type(JT_ALL);
9556 }
9557 QUICK_SELECT_I *qck;
9558 search_if_impossible=
9559 test_quick_select(thd, usable_keys,
9560 used_tables & ~tab->table_ref->map(),
9561 join->calc_found_rows ?
9562 HA_POS_ERROR :
9563 join->unit->select_limit_cnt,
9564 false, // don't force quick range
9565 interesting_order, tab,
9566 tab->condition(),
9567 &tab->needed_reg, &qck,
9568 tab->table()->force_index) < 0;
9569 tab->set_quick(qck);
9570 }
9571 tab->set_condition(orig_cond);
9572 if (search_if_impossible)
9573 {
9574 /*
9575 Before reporting "Impossible WHERE" for the whole query
9576 we have to check isn't it only "impossible ON" instead
9577 */
9578 if (!tab->join_cond())
9579 DBUG_RETURN(1); // No ON, so it's really "impossible WHERE"
9580 Opt_trace_object trace_without_on(trace, "without_ON_clause");
9581 if (tab->quick())
9582 {
9583 delete tab->quick();
9584 tab->set_type(JT_ALL);
9585 }
9586 QUICK_SELECT_I *qck;
9587 const bool impossible_where=
9588 test_quick_select(thd, tab->keys(),
9589 used_tables & ~tab->table_ref->map(),
9590 join->calc_found_rows ?
9591 HA_POS_ERROR :
9592 join->unit->select_limit_cnt,
9593 false, //don't force quick range
9594 ORDER::ORDER_NOT_RELEVANT, tab,
9595 tab->condition(), &tab->needed_reg,
9596 &qck, tab->table()->force_index) < 0;
9597 tab->set_quick(qck);
9598 if (impossible_where)
9599 DBUG_RETURN(1); // Impossible WHERE
9600 }
9601
9602 /*
9603 Access method changed. This is after deciding join order
9604 and access method for all other tables so the info
9605 updated below will not have any effect on the execution
9606 plan.
9607 */
9608 if (tab->quick())
9609 tab->set_type(calc_join_type(tab->quick()->get_type()));
9610
9611 } // end of "if (recheck_reason != DONT_RECHECK)"
9612
9613 if (!tab->table()->quick_keys.is_subset(tab->checked_keys) ||
9614 !tab->needed_reg.is_subset(tab->checked_keys))
9615 {
9616 tab->keys().merge(tab->table()->quick_keys);
9617 tab->keys().merge(tab->needed_reg);
9618
9619 /*
9620 The logic below for assigning tab->use_quick is strange.
9621 It bases the decision of which access method to use
9622 (dynamic range, range, scan) based on seemingly
9623 unrelated information like the presense of another index
9624 with too bad selectivity to be used.
9625
9626 Consider the following scenario:
9627
9628 The join optimizer has decided to use join order
9629 (t1,t2), and 'tab' is currently t2. Further, assume that
9630 there is a join condition between t1 and t2 using some
9631 range operator (e.g. "t1.x < t2.y").
9632
9633 It has been decided that a table scan is best for t2.
9634 make_join_select() then reran the range optimizer a few
9635 lines up because there is an index 't2.good_idx'
9636 covering the t2.y column. If 'good_idx' is the only
9637 index in t2, the decision below will be to use dynamic
9638 range. However, if t2 also has another index 't2.other'
9639 which the range access method can be used on but
9640 selectivity is bad (#rows estimate is high), then table
9641 scan is chosen instead.
9642
9643 Thus, the choice of DYNAMIC RANGE vs SCAN depends on the
9644 presense of an index that has so bad selectivity that it
9645 will not be used anyway.
9646 */
9647 if (!tab->needed_reg.is_clear_all() &&
9648 (tab->table()->quick_keys.is_clear_all() ||
9649 (tab->quick() &&
9650 (tab->quick()->records >= 100L))))
9651 {
9652 tab->use_quick= QS_DYNAMIC_RANGE;
9653 tab->set_type(JT_ALL);
9654 }
9655 else
9656 tab->use_quick= QS_RANGE;
9657 }
9658
9659 if (tab->type() != orig_join_type ||
9660 tab->quick() != orig_quick) // Access method changed
9661 tab->position()->filter_effect= COND_FILTER_STALE;
9662
9663 }
9664 }
9665
9666 if (join->attach_join_conditions(i))
9667 DBUG_RETURN(true);
9668 }
9669 trace_attached_comp.end();
9670
9671 /*
9672 In outer joins the loop above, in iteration for table #i, may push
9673 conditions to a table before #i. Thus, the processing below has to be in
9674 a separate loop:
9675 */
9676 Opt_trace_array trace_attached_summary(trace,
9677 "attached_conditions_summary");
9678 for (uint i= join->const_tables ; i < join->tables ; i++)
9679 {
9680 JOIN_TAB * const tab= join->best_ref[i];
9681 if (!tab->table())
9682 continue;
9683 Item * const cond= tab->condition();
9684 Opt_trace_object trace_one_table(trace);
9685 trace_one_table.add_utf8_table(tab->table_ref).
9686 add("attached", cond);
9687 if (cond &&
9688 cond->has_subquery() /* traverse only if needed */ )
9689 {
9690 /*
9691 Why we pass walk_subquery=false: imagine
9692 WHERE t1.col IN (SELECT * FROM t2
9693 WHERE t2.col IN (SELECT * FROM t3)
9694 and tab==t1. The grandchild subquery (SELECT * FROM t3) should not
9695 be marked as "in condition of t1" but as "in condition of t2", for
9696 correct calculation of the number of its executions.
9697 */
9698 std::pair<SELECT_LEX *, int> pair_object(join->select_lex, i);
9699 cond->walk(&Item::inform_item_in_cond_of_tab,
9700 Item::WALK_POSTFIX,
9701 pointer_cast<uchar * const>(&pair_object));
9702 }
9703
9704 }
9705 }
9706 DBUG_RETURN(0);
9707 }
9708
9709
9710 /**
9711 Remove the following expressions from ORDER BY and GROUP BY:
9712 Constant expressions @n
9713 Expression that only uses tables that are of type EQ_REF and the reference
9714 is in the ORDER list or if all refereed tables are of the above type.
9715
9716 In the following, the X field can be removed:
9717 @code
9718 SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t1.a,t2.X
9719 SELECT * FROM t1,t2,t3 WHERE t1.a=t2.a AND t2.b=t3.b ORDER BY t1.a,t3.X
9720 @endcode
9721
9722 These can't be optimized:
9723 @code
9724 SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t2.X,t1.a
9725 SELECT * FROM t1,t2 WHERE t1.a=t2.a AND t1.b=t2.b ORDER BY t1.a,t2.c
9726 SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t2.b,t1.a
9727 @endcode
9728
9729 @param JOIN join object
9730 @param start_order clause being analyzed (ORDER BY, GROUP BY...)
9731 @param tab table
9732 @param cached_eq_ref_tables bitmap: bit Z is set if the table of map Z
9733 was already the subject of an eq_ref_table() call for the same clause; then
9734 the return value of this previous call can be found at bit Z of
9735 'eq_ref_tables'
9736 @param eq_ref_tables see above.
9737 */
9738
9739 static bool
eq_ref_table(JOIN * join,ORDER * start_order,JOIN_TAB * tab,table_map * cached_eq_ref_tables,table_map * eq_ref_tables)9740 eq_ref_table(JOIN *join, ORDER *start_order, JOIN_TAB *tab,
9741 table_map *cached_eq_ref_tables, table_map *eq_ref_tables)
9742 {
9743 /* We can skip const tables only if not an outer table */
9744 if (tab->type() == JT_CONST && tab->first_inner() == NO_PLAN_IDX)
9745 return true;
9746 if (tab->type() != JT_EQ_REF || tab->table()->is_nullable())
9747 return false;
9748
9749 const table_map map= tab->table_ref->map();
9750 uint found= 0;
9751
9752 for (Item **ref_item= tab->ref().items, **end= ref_item + tab->ref().key_parts ;
9753 ref_item != end ; ref_item++)
9754 {
9755 if (! (*ref_item)->const_item())
9756 { // Not a const ref
9757 ORDER *order;
9758 for (order=start_order ; order ; order=order->next)
9759 {
9760 if ((*ref_item)->eq(order->item[0],0))
9761 break;
9762 }
9763 if (order)
9764 {
9765 if (!(order->used & map))
9766 {
9767 found++;
9768 order->used|= map;
9769 }
9770 continue; // Used in ORDER BY
9771 }
9772 if (!only_eq_ref_tables(join, start_order, (*ref_item)->used_tables(),
9773 cached_eq_ref_tables, eq_ref_tables))
9774 return false;
9775 }
9776 }
9777 /* Check that there was no reference to table before sort order */
9778 for (; found && start_order ; start_order=start_order->next)
9779 {
9780 if (start_order->used & map)
9781 {
9782 found--;
9783 continue;
9784 }
9785 if (start_order->depend_map & map)
9786 return false;
9787 }
9788 return true;
9789 }
9790
9791
9792 /// @see eq_ref_table()
9793 static bool
only_eq_ref_tables(JOIN * join,ORDER * order,table_map tables,table_map * cached_eq_ref_tables,table_map * eq_ref_tables)9794 only_eq_ref_tables(JOIN *join, ORDER *order, table_map tables,
9795 table_map *cached_eq_ref_tables, table_map *eq_ref_tables)
9796 {
9797 tables&= ~PSEUDO_TABLE_BITS;
9798 for (JOIN_TAB **tab=join->map2table ; tables ; tab++, tables>>=1)
9799 {
9800 if (tables & 1)
9801 {
9802 const table_map map= (*tab)->table_ref->map();
9803 bool is_eq_ref;
9804 if (*cached_eq_ref_tables & map) // then there exists a cached bit
9805 is_eq_ref= *eq_ref_tables & map;
9806 else
9807 {
9808 is_eq_ref= eq_ref_table(join, order, *tab,
9809 cached_eq_ref_tables, eq_ref_tables);
9810 if (is_eq_ref)
9811 *eq_ref_tables|= map;
9812 else
9813 *eq_ref_tables&= ~map;
9814 *cached_eq_ref_tables|= map; // now there exists a cached bit
9815 }
9816 if (!is_eq_ref)
9817 return false;
9818 }
9819 }
9820 return true;
9821 }
9822
9823
9824 /**
9825 Check if an expression in ORDER BY or GROUP BY is a duplicate of a
9826 preceding expression.
9827
9828 @param first_order the first expression in the ORDER BY or
9829 GROUP BY clause
9830 @param possible_dup the expression that might be a duplicate of
9831 another expression preceding it the ORDER BY
9832 or GROUP BY clause
9833
9834 @returns true if possible_dup is a duplicate, false otherwise
9835 */
duplicate_order(const ORDER * first_order,const ORDER * possible_dup)9836 static bool duplicate_order(const ORDER *first_order,
9837 const ORDER *possible_dup)
9838 {
9839 const ORDER *order;
9840 for (order=first_order; order ; order=order->next)
9841 {
9842 if (order == possible_dup)
9843 {
9844 // all expressions preceding possible_dup have been checked.
9845 return false;
9846 }
9847 else
9848 {
9849 const Item *it1= order->item[0]->real_item();
9850 const Item *it2= possible_dup->item[0]->real_item();
9851
9852 if (it1->eq(it2, 0))
9853 return true;
9854 }
9855 }
9856 return false;
9857 }
9858
9859 /**
9860 Remove all constants and check if ORDER only contains simple
9861 expressions.
9862
9863 simple_order is set to 1 if sort_order only uses fields from head table
9864 and the head table is not a LEFT JOIN table.
9865
9866 @param first_order List of SORT or GROUP order
9867 @param cond WHERE statement
9868 @param change_list Set to 1 if we should remove things from list.
9869 If this is not set, then only simple_order is
9870 calculated.
9871 @param simple_order Set to 1 if we are only using simple expressions
9872 @param clause_type "ORDER BY" etc for printing in optimizer trace
9873
9874 @return
9875 Returns new sort order
9876 */
9877
remove_const(ORDER * first_order,Item * cond,bool change_list,bool * simple_order,const char * clause_type)9878 ORDER *JOIN::remove_const(ORDER *first_order, Item *cond, bool change_list,
9879 bool *simple_order, const char *clause_type)
9880 {
9881 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
9882
9883 if (plan_is_const())
9884 return change_list ? 0 : first_order; // No need to sort
9885
9886 Opt_trace_context * const trace= &thd->opt_trace;
9887 Opt_trace_disable_I_S trace_disabled(trace, first_order == NULL);
9888 Opt_trace_object trace_wrapper(trace);
9889 Opt_trace_object trace_simpl(trace, "clause_processing");
9890 if (trace->is_started())
9891 {
9892 trace_simpl.add_alnum("clause", clause_type);
9893 String str;
9894 st_select_lex::print_order(&str, first_order,
9895 enum_query_type(QT_TO_SYSTEM_CHARSET |
9896 QT_SHOW_SELECT_NUMBER |
9897 QT_NO_DEFAULT_DB));
9898 trace_simpl.add_utf8("original_clause", str.ptr(), str.length());
9899 }
9900 Opt_trace_array trace_each_item(trace, "items");
9901
9902 ORDER *order,**prev_ptr;
9903 JOIN_TAB *const first_tab= best_ref[const_tables];
9904 table_map first_table= first_tab->table_ref->map();
9905 table_map not_const_tables= ~const_table_map;
9906 table_map ref;
9907 // Caches to avoid repeating eq_ref_table() calls, @see eq_ref_table()
9908 table_map eq_ref_tables= 0, cached_eq_ref_tables= 0;
9909 DBUG_ENTER("JOIN::remove_const");
9910
9911 prev_ptr= &first_order;
9912 *simple_order= !first_tab->join_cond();
9913
9914 /* NOTE: A variable of not_const_tables ^ first_table; breaks gcc 2.7 */
9915
9916 update_depend_map(first_order);
9917 for (order=first_order; order ; order=order->next)
9918 {
9919 Opt_trace_object trace_one_item(trace);
9920 trace_one_item.add("item", order->item[0]);
9921 table_map order_tables=order->item[0]->used_tables();
9922 if (order->item[0]->with_sum_func ||
9923 /*
9924 If the outer table of an outer join is const (either by itself or
9925 after applying WHERE condition), grouping on a field from such a
9926 table will be optimized away and filesort without temporary table
9927 will be used unless we prevent that now. Filesort is not fit to
9928 handle joins and the join condition is not applied. We can't detect
9929 the case without an expensive test, however, so we force temporary
9930 table for all queries containing more than one table, ROLLUP, and an
9931 outer join.
9932 */
9933 (primary_tables > 1 &&
9934 rollup.state == ROLLUP::STATE_INITED &&
9935 select_lex->outer_join))
9936 *simple_order= 0; // Must do a temp table to sort
9937 else if (!(order_tables & not_const_tables))
9938 {
9939 if (order->item[0]->has_subquery())
9940 {
9941 if (!thd->lex->is_explain())
9942 {
9943 Opt_trace_array trace_subselect(trace, "subselect_evaluation");
9944 order->item[0]->val_str(&order->item[0]->str_value);
9945 }
9946 order->item[0]->mark_subqueries_optimized_away();
9947 }
9948 trace_one_item.add("uses_only_constant_tables", true);
9949 continue; // skip const item
9950 }
9951 else if (duplicate_order(first_order, order))
9952 {
9953 /*
9954 If 'order' is a duplicate of an expression earlier in the
9955 ORDER/GROUP BY sequence, it can be removed from the ORDER BY
9956 or GROUP BY clause.
9957 */
9958 trace_one_item.add("duplicate_item", true);
9959 continue;
9960 }
9961 else if (order->in_field_list && order->item[0]->has_subquery())
9962 /*
9963 If the order item is a subquery that is also in the field
9964 list, a temp table should be used to avoid evaluating the
9965 subquery for each row both when a) creating a sort index and
9966 b) getting the value.
9967 Example: "SELECT (SELECT ... ) as a ... GROUP BY a;"
9968 */
9969 *simple_order= false;
9970 else
9971 {
9972 if (order_tables & (RAND_TABLE_BIT | OUTER_REF_TABLE_BIT))
9973 *simple_order=0;
9974 else
9975 {
9976 if (cond && const_expression_in_where(cond,order->item[0]))
9977 {
9978 trace_one_item.add("equals_constant_in_where", true);
9979 continue;
9980 }
9981 if ((ref=order_tables & (not_const_tables ^ first_table)))
9982 {
9983 if (!(order_tables & first_table) &&
9984 only_eq_ref_tables(this, first_order, ref,
9985 &cached_eq_ref_tables, &eq_ref_tables))
9986 {
9987 trace_one_item.add("eq_ref_to_preceding_items", true);
9988 continue;
9989 }
9990 *simple_order=0; // Must do a temp table to sort
9991 }
9992 }
9993 }
9994 if (change_list)
9995 *prev_ptr= order; // use this entry
9996 prev_ptr= &order->next;
9997 }
9998 if (change_list)
9999 *prev_ptr=0;
10000 if (prev_ptr == &first_order) // Nothing to sort/group
10001 *simple_order=1;
10002 DBUG_PRINT("exit",("simple_order: %d",(int) *simple_order));
10003
10004 trace_each_item.end();
10005 trace_simpl.add("resulting_clause_is_simple", *simple_order);
10006 if (trace->is_started() && change_list)
10007 {
10008 String str;
10009 st_select_lex::print_order(&str, first_order,
10010 enum_query_type(QT_TO_SYSTEM_CHARSET |
10011 QT_SHOW_SELECT_NUMBER |
10012 QT_NO_DEFAULT_DB));
10013 trace_simpl.add_utf8("resulting_clause", str.ptr(), str.length());
10014 }
10015
10016 DBUG_RETURN(first_order);
10017 }
10018
10019
10020 /**
10021 Optimize conditions by
10022
10023 a) applying transitivity to build multiple equality predicates
10024 (MEP): if x=y and y=z the MEP x=y=z is built.
10025 b) apply constants where possible. If the value of x is known to be
10026 42, x is replaced with a constant of value 42. By transitivity, this
10027 also applies to MEPs, so the MEP in a) will become 42=x=y=z.
10028 c) remove conditions that are always false or always true
10029
10030 @param thd Thread handler
10031 @param[in,out] cond WHERE or HAVING condition to optimize
10032 @param[out] cond_equal The built multiple equalities
10033 @param join_list list of join operations with join conditions
10034 = NULL: Called for HAVING condition
10035 @param[out] cond_value Not changed if cond was empty
10036 COND_TRUE if cond is always true
10037 COND_FALSE if cond is impossible
10038 COND_OK otherwise
10039
10040 @returns false if success, true if error
10041 */
10042
optimize_cond(THD * thd,Item ** cond,COND_EQUAL ** cond_equal,List<TABLE_LIST> * join_list,Item::cond_result * cond_value)10043 bool optimize_cond(THD *thd, Item **cond, COND_EQUAL **cond_equal,
10044 List<TABLE_LIST> *join_list,
10045 Item::cond_result *cond_value)
10046 {
10047 Opt_trace_context * const trace= &thd->opt_trace;
10048 DBUG_ENTER("optimize_cond");
10049
10050 Opt_trace_object trace_wrapper(trace);
10051 Opt_trace_object trace_cond(trace, "condition_processing");
10052 trace_cond.add_alnum("condition", join_list ? "WHERE" : "HAVING");
10053 trace_cond.add("original_condition", *cond);
10054 Opt_trace_array trace_steps(trace, "steps");
10055
10056 /*
10057 Enter this function
10058 a) For a WHERE condition or a query having outer join.
10059 b) For a HAVING condition.
10060 */
10061 assert(*cond || join_list);
10062
10063 /*
10064 Build all multiple equality predicates and eliminate equality
10065 predicates that can be inferred from these multiple equalities.
10066 For each reference of a field included into a multiple equality
10067 that occurs in a function set a pointer to the multiple equality
10068 predicate. Substitute a constant instead of this field if the
10069 multiple equality contains a constant.
10070 This is performed for the WHERE condition and any join conditions, but
10071 not for the HAVING condition.
10072 */
10073 if (join_list)
10074 {
10075 Opt_trace_object step_wrapper(trace);
10076 step_wrapper.add_alnum("transformation", "equality_propagation");
10077 {
10078 Opt_trace_disable_I_S
10079 disable_trace_wrapper(trace, !(*cond && (*cond)->has_subquery()));
10080 Opt_trace_array
10081 trace_subselect(trace, "subselect_evaluation");
10082 if (build_equal_items(thd, *cond, cond, NULL, true,
10083 join_list, cond_equal))
10084 DBUG_RETURN(true);
10085 }
10086 step_wrapper.add("resulting_condition", *cond);
10087 }
10088 /* change field = field to field = const for each found field = const */
10089 if (*cond)
10090 {
10091 Opt_trace_object step_wrapper(trace);
10092 step_wrapper.add_alnum("transformation", "constant_propagation");
10093 {
10094 Opt_trace_disable_I_S
10095 disable_trace_wrapper(trace, !(*cond)->has_subquery());
10096 Opt_trace_array trace_subselect(trace, "subselect_evaluation");
10097 if (propagate_cond_constants(thd, NULL, *cond, *cond))
10098 DBUG_RETURN(true);
10099 }
10100 step_wrapper.add("resulting_condition", *cond);
10101 }
10102
10103 /*
10104 Remove all instances of item == item
10105 Remove all and-levels where CONST item != CONST item
10106 */
10107 DBUG_EXECUTE("where",print_where(*cond,"after const change", QT_ORDINARY););
10108 if (*cond)
10109 {
10110 Opt_trace_object step_wrapper(trace);
10111 step_wrapper.add_alnum("transformation", "trivial_condition_removal");
10112 {
10113 Opt_trace_disable_I_S
10114 disable_trace_wrapper(trace, !(*cond)->has_subquery());
10115 Opt_trace_array trace_subselect(trace, "subselect_evaluation");
10116 if (remove_eq_conds(thd, *cond, cond, cond_value))
10117 DBUG_RETURN(true);
10118 }
10119 step_wrapper.add("resulting_condition", *cond);
10120 }
10121 assert(!thd->is_error());
10122 if (thd->is_error())
10123 DBUG_RETURN(true);
10124 DBUG_RETURN(false);
10125 }
10126
10127
10128 /**
10129 Handle the recursive job for remove_eq_conds()
10130
10131 @param thd Thread handler
10132 @param cond the condition to handle.
10133 @param[out] retcond Modified condition after removal
10134 @param[out] cond_value the resulting value of the condition
10135
10136 @see remove_eq_conds() for more details on argument
10137
10138 @returns false if success, true if error
10139 */
10140
internal_remove_eq_conds(THD * thd,Item * cond,Item ** retcond,Item::cond_result * cond_value)10141 static bool internal_remove_eq_conds(THD *thd, Item *cond,
10142 Item **retcond,
10143 Item::cond_result *cond_value)
10144 {
10145 if (cond->type() == Item::COND_ITEM)
10146 {
10147 Item_cond *const item_cond= down_cast<Item_cond *>(cond);
10148 const bool and_level= item_cond->functype() == Item_func::COND_AND_FUNC;
10149 List_iterator<Item> li(*item_cond->argument_list());
10150 bool should_fix_fields= false;
10151
10152 *cond_value=Item::COND_UNDEF;
10153 Item *item;
10154 while ((item=li++))
10155 {
10156 Item *new_item;
10157 Item::cond_result tmp_cond_value;
10158 if (internal_remove_eq_conds(thd, item, &new_item, &tmp_cond_value))
10159 return true;
10160
10161 if (new_item == NULL)
10162 li.remove();
10163 else if (item != new_item)
10164 {
10165 (void) li.replace(new_item);
10166 should_fix_fields= true;
10167 }
10168 if (*cond_value == Item::COND_UNDEF)
10169 *cond_value= tmp_cond_value;
10170 switch (tmp_cond_value)
10171 {
10172 case Item::COND_OK: // Not TRUE or FALSE
10173 if (and_level || *cond_value == Item::COND_FALSE)
10174 *cond_value= tmp_cond_value;
10175 break;
10176 case Item::COND_FALSE:
10177 if (and_level) // Always false
10178 {
10179 *cond_value= tmp_cond_value;
10180 *retcond= NULL;
10181 return false;
10182 }
10183 break;
10184 case Item::COND_TRUE:
10185 if (!and_level) // Always true
10186 {
10187 *cond_value= tmp_cond_value;
10188 *retcond= NULL;
10189 return false;
10190 }
10191 break;
10192 case Item::COND_UNDEF: // Impossible
10193 assert(false); /* purecov: deadcode */
10194 }
10195 }
10196 if (should_fix_fields)
10197 item_cond->update_used_tables();
10198
10199 if (item_cond->argument_list()->elements == 0 ||
10200 *cond_value != Item::COND_OK)
10201 {
10202 *retcond= NULL;
10203 return false;
10204 }
10205 if (item_cond->argument_list()->elements == 1)
10206 {
10207 /*
10208 BUG#11765699:
10209 We're dealing with an AND or OR item that has only one
10210 argument. However, it is not an option to empty the list
10211 because:
10212
10213 - this function is called for either JOIN::conds or
10214 JOIN::having, but these point to the same condition as
10215 SELECT_LEX::where and SELECT_LEX::having do.
10216
10217 - The return value of remove_eq_conds() is assigned to
10218 JOIN::conds and JOIN::having, so emptying the list and
10219 returning the only remaining item "replaces" the AND or OR
10220 with item for the variables in JOIN. However, the return
10221 value is not assigned to the SELECT_LEX counterparts. Thus,
10222 if argument_list is emptied, SELECT_LEX forgets the item in
10223 argument_list()->head().
10224
10225 item is therefore returned, but argument_list is not emptied.
10226 */
10227 item= item_cond->argument_list()->head();
10228 /*
10229 Consider reenabling the line below when the optimizer has been
10230 split into properly separated phases.
10231
10232 item_cond->argument_list()->empty();
10233 */
10234 *retcond= item;
10235 return false;
10236 }
10237 }
10238 else if (cond->type() == Item::FUNC_ITEM &&
10239 down_cast<Item_func *>(cond)->functype() == Item_func::ISNULL_FUNC)
10240 {
10241 Item_func_isnull *const func= down_cast<Item_func_isnull *>(cond);
10242 Item **args= func->arguments();
10243 if (args[0]->type() == Item::FIELD_ITEM)
10244 {
10245 Field *const field= down_cast<Item_field *>(args[0])->field;
10246 /* fix to replace 'NULL' dates with '0' (shreeve@uci.edu) */
10247 /*
10248 See BUG#12594011
10249 Documentation says that
10250 SELECT datetime_notnull d FROM t1 WHERE d IS NULL
10251 shall return rows where d=='0000-00-00'
10252
10253 Thus, for DATE and DATETIME columns defined as NOT NULL,
10254 "date_notnull IS NULL" has to be modified to
10255 "date_notnull IS NULL OR date_notnull == 0" (if outer join)
10256 "date_notnull == 0" (otherwise)
10257
10258 */
10259 if (((field->type() == MYSQL_TYPE_DATE) ||
10260 (field->type() == MYSQL_TYPE_DATETIME)) &&
10261 (field->flags & NOT_NULL_FLAG))
10262 {
10263 Item *item0= new(thd->mem_root) Item_int((longlong)0, 1);
10264 if (item0 == NULL)
10265 return true;
10266 Item *eq_cond= new(thd->mem_root) Item_func_eq(args[0], item0);
10267 if (eq_cond == NULL)
10268 return true;
10269
10270 if (args[0]->is_outer_field())
10271 {
10272 // outer join: transform "col IS NULL" to "col IS NULL or col=0"
10273 Item *or_cond= new(thd->mem_root) Item_cond_or(eq_cond, cond);
10274 if (or_cond == NULL)
10275 return true;
10276 cond= or_cond;
10277 }
10278 else
10279 {
10280 // not outer join: transform "col IS NULL" to "col=0"
10281 cond= eq_cond;
10282 }
10283
10284 if (cond->fix_fields(thd, &cond))
10285 return true;
10286 }
10287 }
10288 if (cond->const_item())
10289 {
10290 bool value;
10291 if (eval_const_cond(thd, cond, &value))
10292 return true;
10293 *cond_value= value ? Item::COND_TRUE : Item::COND_FALSE;
10294 *retcond= NULL;
10295 return false;
10296 }
10297 }
10298 else if (cond->const_item() && !cond->is_expensive())
10299 {
10300 bool value;
10301 if (eval_const_cond(thd, cond, &value))
10302 return true;
10303 *cond_value= value ? Item::COND_TRUE : Item::COND_FALSE;
10304 *retcond= NULL;
10305 return false;
10306 }
10307 else
10308 { // boolan compare function
10309 *cond_value= cond->eq_cmp_result();
10310 if (*cond_value == Item::COND_OK)
10311 {
10312 *retcond= cond;
10313 return false;
10314 }
10315 Item *left_item= down_cast<Item_func *>(cond)->arguments()[0];
10316 Item *right_item= down_cast<Item_func *>(cond)->arguments()[1];
10317 if (left_item->eq(right_item,1))
10318 {
10319 if (!left_item->maybe_null ||
10320 down_cast<Item_func *>(cond)->functype() == Item_func::EQUAL_FUNC)
10321 {
10322 *retcond= NULL;
10323 return false; // Compare of identical items
10324 }
10325 }
10326 }
10327 *cond_value= Item::COND_OK;
10328 *retcond= cond; // Point at next and level
10329 return false;
10330 }
10331
10332
10333 /**
10334 Remove const and eq items. Return new item, or NULL if no condition
10335
10336 @param thd thread handler
10337 @param cond the condition to handle
10338 @param[out] retcond condition after const removal
10339 @param[out] cond_value resulting value of the condition
10340 =COND_OK condition must be evaluated (e.g field = constant)
10341 =COND_TRUE always true (e.g 1 = 1)
10342 =COND_FALSE always false (e.g 1 = 2)
10343
10344 @note calls internal_remove_eq_conds() to check the complete tree.
10345
10346 @returns false if success, true if error
10347 */
10348
remove_eq_conds(THD * thd,Item * cond,Item ** retcond,Item::cond_result * cond_value)10349 bool remove_eq_conds(THD *thd, Item *cond, Item **retcond,
10350 Item::cond_result *cond_value)
10351 {
10352 if (cond->type() == Item::FUNC_ITEM &&
10353 down_cast<Item_func *>(cond)->functype() == Item_func::ISNULL_FUNC)
10354 {
10355 /*
10356 Handles this special case for some ODBC applications:
10357 The are requesting the row that was just updated with a auto_increment
10358 value with this construct:
10359
10360 SELECT * from table_name where auto_increment_column IS NULL
10361 This will be changed to:
10362 SELECT * from table_name where auto_increment_column = LAST_INSERT_ID
10363 */
10364
10365 Item_func_isnull *const func= down_cast<Item_func_isnull *>(cond);
10366 Item **args= func->arguments();
10367 if (args[0]->type() == Item::FIELD_ITEM)
10368 {
10369 Field *const field= down_cast<Item_field *>(args[0])->field;
10370 if ((field->flags & AUTO_INCREMENT_FLAG) &&
10371 !field->table->is_nullable() &&
10372 (thd->variables.option_bits & OPTION_AUTO_IS_NULL) &&
10373 (thd->first_successful_insert_id_in_prev_stmt > 0 &&
10374 thd->substitute_null_with_insert_id))
10375 {
10376 query_cache.abort(&thd->query_cache_tls);
10377
10378 cond= new Item_func_eq(
10379 args[0],
10380 new Item_int(NAME_STRING("last_insert_id()"),
10381 thd->read_first_successful_insert_id_in_prev_stmt(),
10382 MY_INT64_NUM_DECIMAL_DIGITS));
10383 if (cond == NULL)
10384 return true;
10385
10386 if (cond->fix_fields(thd, &cond))
10387 return true;
10388
10389 /*
10390 IS NULL should be mapped to LAST_INSERT_ID only for first row, so
10391 clear for next row
10392 */
10393 thd->substitute_null_with_insert_id= FALSE;
10394
10395 *cond_value= Item::COND_OK;
10396 *retcond= cond;
10397 return false;
10398 }
10399 }
10400 }
10401 return internal_remove_eq_conds(thd, cond, retcond, cond_value);
10402 }
10403
10404
10405 /**
10406 Check if GROUP BY/DISTINCT can be optimized away because the set is
10407 already known to be distinct.
10408
10409 Used in removing the GROUP BY/DISTINCT of the following types of
10410 statements:
10411 @code
10412 SELECT [DISTINCT] <unique_key_cols>... FROM <single_table_ref>
10413 [GROUP BY <unique_key_cols>,...]
10414 @endcode
10415
10416 If (a,b,c is distinct)
10417 then <any combination of a,b,c>,{whatever} is also distinct
10418
10419 This function checks if all the key parts of any of the unique keys
10420 of the table are referenced by a list : either the select list
10421 through find_field_in_item_list or GROUP BY list through
10422 find_field_in_order_list.
10423 If the above holds and the key parts cannot contain NULLs then we
10424 can safely remove the GROUP BY/DISTINCT,
10425 as no result set can be more distinct than an unique key.
10426
10427 @param tab The join table to operate on.
10428 @param find_func function to iterate over the list and search
10429 for a field
10430
10431 @retval
10432 1 found
10433 @retval
10434 0 not found.
10435
10436 @note
10437 The function assumes that make_outerjoin_info() has been called in
10438 order for the check for outer tables to work.
10439 */
10440
10441 static bool
list_contains_unique_index(JOIN_TAB * tab,bool (* find_func)(Field *,void *),void * data)10442 list_contains_unique_index(JOIN_TAB *tab,
10443 bool (*find_func) (Field *, void *), void *data)
10444 {
10445 TABLE *table= tab->table();
10446
10447 if (tab->is_inner_table_of_outer_join())
10448 return 0;
10449 for (uint keynr= 0; keynr < table->s->keys; keynr++)
10450 {
10451 if (keynr == table->s->primary_key ||
10452 (table->key_info[keynr].flags & HA_NOSAME))
10453 {
10454 KEY *keyinfo= table->key_info + keynr;
10455 KEY_PART_INFO *key_part, *key_part_end;
10456
10457 for (key_part=keyinfo->key_part,
10458 key_part_end=key_part+ keyinfo->user_defined_key_parts;
10459 key_part < key_part_end;
10460 key_part++)
10461 {
10462 if (key_part->field->real_maybe_null() ||
10463 !find_func(key_part->field, data))
10464 break;
10465 }
10466 if (key_part == key_part_end)
10467 return 1;
10468 }
10469 }
10470 return 0;
10471 }
10472
10473
10474 /**
10475 Helper function for list_contains_unique_index.
10476 Find a field reference in a list of ORDER structures.
10477 Finds a direct reference of the Field in the list.
10478
10479 @param field The field to search for.
10480 @param data ORDER *.The list to search in
10481
10482 @retval
10483 1 found
10484 @retval
10485 0 not found.
10486 */
10487
10488 static bool
find_field_in_order_list(Field * field,void * data)10489 find_field_in_order_list (Field *field, void *data)
10490 {
10491 ORDER *group= (ORDER *) data;
10492 bool part_found= 0;
10493 for (ORDER *tmp_group= group; tmp_group; tmp_group=tmp_group->next)
10494 {
10495 Item *item= (*tmp_group->item)->real_item();
10496 if (item->type() == Item::FIELD_ITEM &&
10497 ((Item_field*) item)->field->eq(field))
10498 {
10499 part_found= 1;
10500 break;
10501 }
10502 }
10503 return part_found;
10504 }
10505
10506
10507 /**
10508 Helper function for list_contains_unique_index.
10509 Find a field reference in a dynamic list of Items.
10510 Finds a direct reference of the Field in the list.
10511
10512 @param[in] field The field to search for.
10513 @param[in] data List<Item> *.The list to search in
10514
10515 @retval
10516 1 found
10517 @retval
10518 0 not found.
10519 */
10520
10521 static bool
find_field_in_item_list(Field * field,void * data)10522 find_field_in_item_list (Field *field, void *data)
10523 {
10524 List<Item> *fields= (List<Item> *) data;
10525 bool part_found= 0;
10526 List_iterator<Item> li(*fields);
10527 Item *item;
10528
10529 while ((item= li++))
10530 {
10531 if (item->type() == Item::FIELD_ITEM &&
10532 ((Item_field*) item)->field->eq(field))
10533 {
10534 part_found= 1;
10535 break;
10536 }
10537 }
10538 return part_found;
10539 }
10540
10541
10542 /**
10543 Create a group by that consist of all non const fields.
10544
10545 Try to use the fields in the order given by 'order' to allow one to
10546 optimize away 'order by'.
10547 */
10548
10549 static ORDER *
create_distinct_group(THD * thd,Ref_ptr_array ref_pointer_array,ORDER * order_list,List<Item> & fields,List<Item> & all_fields,bool * all_order_by_fields_used)10550 create_distinct_group(THD *thd, Ref_ptr_array ref_pointer_array,
10551 ORDER *order_list, List<Item> &fields,
10552 List<Item> &all_fields,
10553 bool *all_order_by_fields_used)
10554 {
10555 List_iterator<Item> li(fields);
10556 Item *item;
10557 ORDER *order,*group,**prev;
10558
10559 *all_order_by_fields_used= 1;
10560 while ((item=li++))
10561 item->marker=0; /* Marker that field is not used */
10562
10563 prev= &group; group=0;
10564 for (order=order_list ; order; order=order->next)
10565 {
10566 if (order->in_field_list)
10567 {
10568 ORDER *ord=(ORDER*) thd->memdup((char*) order,sizeof(ORDER));
10569 if (!ord)
10570 return 0;
10571 *prev=ord;
10572 prev= &ord->next;
10573 (*ord->item)->marker=1;
10574 }
10575 else
10576 *all_order_by_fields_used= 0;
10577 }
10578
10579 li.rewind();
10580 while ((item=li++))
10581 {
10582 if (!item->const_item() && !item->with_sum_func && !item->marker)
10583 {
10584 /*
10585 Don't put duplicate columns from the SELECT list into the
10586 GROUP BY list.
10587 */
10588 ORDER *ord_iter;
10589 for (ord_iter= group; ord_iter; ord_iter= ord_iter->next)
10590 if ((*ord_iter->item)->eq(item, 1))
10591 goto next_item;
10592
10593 ORDER *ord=(ORDER*) thd->mem_calloc(sizeof(ORDER));
10594 if (!ord)
10595 return 0;
10596
10597 if (item->type() == Item::FIELD_ITEM &&
10598 item->field_type() == MYSQL_TYPE_BIT)
10599 {
10600 /*
10601 Because HEAP tables can't index BIT fields we need to use an
10602 additional hidden field for grouping because later it will be
10603 converted to a LONG field. Original field will remain of the
10604 BIT type and will be returned to a client.
10605 @note setup_ref_array() needs to account for the extra space.
10606 */
10607 Item_field *new_item= new Item_field(thd, (Item_field*)item);
10608 ord->item= thd->lex->current_select()->add_hidden_item(new_item);
10609 }
10610 else
10611 {
10612 /*
10613 We have here only field_list (not all_field_list), so we can use
10614 simple indexing of ref_pointer_array (order in the array and in the
10615 list are same)
10616 */
10617 ord->item= &ref_pointer_array[0];
10618 }
10619 ord->direction= ORDER::ORDER_ASC;
10620 *prev=ord;
10621 prev= &ord->next;
10622 }
10623 next_item:
10624 ref_pointer_array.pop_front();
10625 }
10626 *prev=0;
10627 return group;
10628 }
10629
10630
10631 /**
10632 Return table number if there is only one table in sort order
10633 and group and order is compatible, else return 0.
10634 */
10635
10636 static TABLE *
get_sort_by_table(ORDER * a,ORDER * b,TABLE_LIST * tables)10637 get_sort_by_table(ORDER *a,ORDER *b,TABLE_LIST *tables)
10638 {
10639 table_map map= (table_map) 0;
10640 DBUG_ENTER("get_sort_by_table");
10641
10642 if (!a)
10643 a=b; // Only one need to be given
10644 else if (!b)
10645 b=a;
10646
10647 for (; a && b; a=a->next,b=b->next)
10648 {
10649 if (!(*a->item)->eq(*b->item,1))
10650 DBUG_RETURN(0);
10651 map|=a->item[0]->used_tables();
10652 }
10653 map&= ~PARAM_TABLE_BIT;
10654 if (!map || (map & (RAND_TABLE_BIT | OUTER_REF_TABLE_BIT)))
10655 DBUG_RETURN(0);
10656
10657 for (; !(map & tables->map()); tables= tables->next_leaf) ;
10658 if (map != tables->map())
10659 DBUG_RETURN(0); // More than one table
10660 DBUG_PRINT("exit",("sort by table: %d",tables->tableno()));
10661 DBUG_RETURN(tables->table);
10662 }
10663
10664
10665 /**
10666 Create a condition for a const reference for a table.
10667
10668 @param thd THD pointer
10669 @param join_tab pointer to the table
10670
10671 @return A pointer to the created condition for the const reference.
10672 @retval !NULL if the condition was created successfully
10673 @retval NULL if an error has occured
10674 */
10675
create_cond_for_const_ref(THD * thd,JOIN_TAB * join_tab)10676 static Item_cond_and *create_cond_for_const_ref(THD *thd, JOIN_TAB *join_tab)
10677 {
10678 DBUG_ENTER("create_cond_for_const_ref");
10679 assert(join_tab->ref().key_parts);
10680
10681 TABLE *table= join_tab->table();
10682 Item_cond_and *cond= new Item_cond_and();
10683 if (!cond)
10684 DBUG_RETURN(NULL);
10685
10686 for (uint i=0 ; i < join_tab->ref().key_parts ; i++)
10687 {
10688 Field *field= table->field[table->key_info[join_tab->ref().key].key_part[i].
10689 fieldnr-1];
10690 Item *value= join_tab->ref().items[i];
10691 Item *item= new Item_field(field);
10692 if (!item)
10693 DBUG_RETURN(NULL);
10694 item= join_tab->ref().null_rejecting & ((key_part_map)1 << i) ?
10695 (Item *)new Item_func_eq(item, value) :
10696 (Item *)new Item_func_equal(item, value);
10697 if (!item)
10698 DBUG_RETURN(NULL);
10699 if (cond->add(item))
10700 DBUG_RETURN(NULL);
10701 }
10702 cond->fix_fields(thd, (Item**)&cond);
10703
10704 DBUG_RETURN(cond);
10705 }
10706
10707 /**
10708 Create a condition for a const reference and add this to the
10709 currenct select for the table.
10710 */
10711
add_ref_to_table_cond(THD * thd,JOIN_TAB * join_tab)10712 static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab)
10713 {
10714 DBUG_ENTER("add_ref_to_table_cond");
10715 if (!join_tab->ref().key_parts)
10716 DBUG_RETURN(FALSE);
10717
10718 int error= 0;
10719
10720 /* Create a condition representing the const reference. */
10721 Item_cond_and *cond= create_cond_for_const_ref(thd, join_tab);
10722 if (!cond)
10723 DBUG_RETURN(TRUE);
10724
10725 /* Add this condition to the existing select condtion */
10726 if (join_tab->condition())
10727 {
10728 error=(int) cond->add(join_tab->condition());
10729 cond->update_used_tables();
10730 }
10731 join_tab->set_condition(cond);
10732 Opt_trace_object(&thd->opt_trace).add("added_back_ref_condition", cond);
10733
10734 DBUG_RETURN(error ? TRUE : FALSE);
10735 }
10736
10737
10738 /**
10739 Remove additional condition inserted by IN/ALL/ANY transformation.
10740
10741 @param conds condition for processing
10742
10743 @return
10744 new conditions
10745
10746 @note that this function has Bug#13915291.
10747 */
10748
remove_additional_cond(Item * conds)10749 static Item *remove_additional_cond(Item* conds)
10750 {
10751 // Because it uses in_additional_cond it applies only to the scalar case.
10752 if (conds->item_name.ptr() == in_additional_cond)
10753 return 0;
10754 if (conds->type() == Item::COND_ITEM)
10755 {
10756 Item_cond *cnd= (Item_cond*) conds;
10757 List_iterator<Item> li(*(cnd->argument_list()));
10758 Item *item;
10759 while ((item= li++))
10760 {
10761 if (item->item_name.ptr() == in_additional_cond)
10762 {
10763 li.remove();
10764 if (cnd->argument_list()->elements == 1)
10765 return cnd->argument_list()->head();
10766 return conds;
10767 }
10768 }
10769 }
10770 return conds;
10771 }
10772
10773
10774 /**
10775 Update some values in keyuse for faster choose_table_order() loop.
10776
10777 @todo Check if this is the real meaning of ref_table_rows.
10778
10779 @param keyuse_array Array of Key_use elements being updated.
10780
10781
10782 */
10783
optimize_keyuse()10784 void JOIN::optimize_keyuse()
10785 {
10786 for (size_t ix= 0; ix < keyuse_array.size(); ++ix)
10787 {
10788 Key_use *keyuse= &keyuse_array.at(ix);
10789 table_map map;
10790 /*
10791 If we find a ref, assume this table matches a proportional
10792 part of this table.
10793 For example 100 records matching a table with 5000 records
10794 gives 5000/100 = 50 records per key
10795 Constant tables are ignored.
10796 To avoid bad matches, we don't make ref_table_rows less than 100.
10797 */
10798 keyuse->ref_table_rows= ~(ha_rows) 0; // If no ref
10799 if (keyuse->used_tables &
10800 (map= (keyuse->used_tables & ~const_table_map & ~PSEUDO_TABLE_BITS)))
10801 {
10802 uint tableno;
10803 for (tableno= 0; ! (map & 1) ; map>>=1, tableno++)
10804 {}
10805 if (map == 1) // Only one table
10806 {
10807 TABLE *tmp_table= join_tab[tableno].table();
10808
10809 keyuse->ref_table_rows= max<ha_rows>(tmp_table->file->stats.records, 100);
10810 }
10811 }
10812 /*
10813 Outer reference (external field) is constant for single executing
10814 of subquery
10815 */
10816 if (keyuse->used_tables == OUTER_REF_TABLE_BIT)
10817 keyuse->ref_table_rows= 1;
10818 }
10819 }
10820
10821 /**
10822 Function sets FT hints, initializes FT handlers
10823 and checks if FT index can be used as covered.
10824 */
10825
optimize_fts_query()10826 bool JOIN::optimize_fts_query()
10827 {
10828 ASSERT_BEST_REF_IN_JOIN_ORDER(this);
10829
10830 assert(select_lex->has_ft_funcs());
10831
10832 for (uint i= const_tables; i < tables; i++)
10833 {
10834 JOIN_TAB *tab= best_ref[i];
10835 if (tab->type() != JT_FT)
10836 continue;
10837
10838 Item_func_match *ifm;
10839 Item_func_match* ft_func=
10840 static_cast<Item_func_match*>(tab->position()->key->val);
10841 List_iterator<Item_func_match> li(*(select_lex->ftfunc_list));
10842
10843 while ((ifm= li++))
10844 {
10845 if (!(ifm->used_tables() & tab->table_ref->map()) || ifm->master)
10846 continue;
10847
10848 if (ifm != ft_func)
10849 {
10850 if (ifm->can_skip_ranking())
10851 ifm->set_hints(this, FT_NO_RANKING, HA_POS_ERROR, false);
10852 }
10853 }
10854
10855 /*
10856 Check if internal sorting is needed. FT_SORTED flag is set
10857 if no ORDER BY clause or ORDER BY MATCH function is the same
10858 as the function that is used for FT index and FT table is
10859 the first non-constant table in the JOIN.
10860 */
10861 if (i == const_tables &&
10862 !(ft_func->get_hints()->get_flags() & FT_BOOL) &&
10863 (!order || ft_func == test_if_ft_index_order(order)))
10864 ft_func->set_hints(this, FT_SORTED, m_select_limit, false);
10865
10866 /*
10867 Check if ranking is not needed. FT_NO_RANKING flag is set if
10868 MATCH function is used only in WHERE condition and MATCH
10869 function is not part of an expression.
10870 */
10871 if (ft_func->can_skip_ranking())
10872 ft_func->set_hints(this, FT_NO_RANKING,
10873 !order ? m_select_limit : HA_POS_ERROR, false);
10874 }
10875
10876 return init_ftfuncs(thd, select_lex);
10877 }
10878
10879
10880 /**
10881 Check if FTS index only access is possible.
10882
10883 @param tab pointer to JOIN_TAB structure.
10884
10885 @return TRUE if index only access is possible,
10886 FALSE otherwise.
10887 */
10888
fts_index_access(JOIN_TAB * tab)10889 bool JOIN::fts_index_access(JOIN_TAB *tab)
10890 {
10891 assert(tab->type() == JT_FT);
10892 TABLE *table= tab->table();
10893
10894 if ((table->file->ha_table_flags() & HA_CAN_FULLTEXT_EXT) == 0)
10895 return false; // Optimizations requires extended FTS support by table engine
10896
10897 /*
10898 This optimization does not work with filesort nor GROUP BY
10899 */
10900 if (grouped || (order && ordered_index_usage != ordered_index_order_by))
10901 return false;
10902
10903 /*
10904 Check whether the FTS result is covering. If only document id
10905 and rank is needed, there is no need to access table rows.
10906 */
10907 for (uint i= bitmap_get_first_set(table->read_set);
10908 i < table->s->fields;
10909 i= bitmap_get_next_set(table->read_set, i))
10910 {
10911 if (table->field[i] != table->fts_doc_id_field ||
10912 !tab->ft_func()->docid_in_result())
10913 return false;
10914 }
10915
10916 return true;
10917 }
10918
10919
10920 /**
10921 For {semijoin,subquery} materialization: calculates various cost
10922 information, based on a plan in join->best_positions covering the
10923 to-be-materialized query block and only this.
10924
10925 @param join JOIN where plan can be found
10926 @param sj_nest sj materialization nest (NULL if subquery materialization)
10927 @param n_tables number of to-be-materialized tables
10928 @param[out] sjm where computed costs will be stored
10929
10930 @note that this function modifies join->map2table, which has to be filled
10931 correctly later.
10932 */
calculate_materialization_costs(JOIN * join,TABLE_LIST * sj_nest,uint n_tables,Semijoin_mat_optimize * sjm)10933 static void calculate_materialization_costs(JOIN *join,
10934 TABLE_LIST *sj_nest,
10935 uint n_tables,
10936 Semijoin_mat_optimize *sjm)
10937 {
10938 double mat_cost; // Estimated cost of materialization
10939 double mat_rowcount; // Estimated row count before duplicate removal
10940 double distinct_rowcount; // Estimated rowcount after duplicate removal
10941 List<Item> *inner_expr_list;
10942
10943 if (sj_nest)
10944 {
10945 /*
10946 get_partial_join_cost() assumes a regular join, which is correct when
10947 we optimize a sj-materialization nest (always executed as regular
10948 join).
10949 */
10950 get_partial_join_cost(join, n_tables, &mat_cost, &mat_rowcount);
10951 n_tables+= join->const_tables;
10952 inner_expr_list= &sj_nest->nested_join->sj_inner_exprs;
10953 }
10954 else
10955 {
10956 mat_cost= join->best_read;
10957 mat_rowcount= static_cast<double>(join->best_rowcount);
10958 inner_expr_list= &join->select_lex->item_list;
10959 }
10960
10961 /*
10962 Adjust output cardinality estimates. If the subquery has form
10963
10964 ... oe IN (SELECT t1.colX, t2.colY, func(X,Y,Z) )
10965
10966 then the number of distinct output record combinations has an
10967 upper bound of product of number of records matching the tables
10968 that are used by the SELECT clause.
10969 TODO:
10970 We can get a more precise estimate if we
10971 - use rec_per_key cardinality estimates. For simple cases like
10972 "oe IN (SELECT t.key ...)" it is trivial.
10973 - Functional dependencies between the tables in the semi-join
10974 nest (the payoff is probably less here?)
10975 */
10976 {
10977 for (uint i=0 ; i < n_tables ; i++)
10978 {
10979 JOIN_TAB * const tab= join->best_positions[i].table;
10980 join->map2table[tab->table_ref->tableno()]= tab;
10981 }
10982 List_iterator<Item> it(*inner_expr_list);
10983 Item *item;
10984 table_map map= 0;
10985 while ((item= it++))
10986 map|= item->used_tables();
10987 map&= ~PSEUDO_TABLE_BITS;
10988 Table_map_iterator tm_it(map);
10989 int tableno;
10990 double rows= 1.0;
10991 while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
10992 rows*= join->map2table[tableno]->table()->quick_condition_rows;
10993 distinct_rowcount= min(mat_rowcount, rows);
10994 }
10995 /*
10996 Calculate temporary table parameters and usage costs
10997 */
10998 const uint rowlen= get_tmp_table_rec_length(*inner_expr_list);
10999
11000 const Cost_model_server *cost_model= join->cost_model();
11001
11002 Cost_model_server::enum_tmptable_type tmp_table_type;
11003 if (rowlen * distinct_rowcount < join->thd->variables.max_heap_table_size)
11004 tmp_table_type= Cost_model_server::MEMORY_TMPTABLE;
11005 else
11006 tmp_table_type= Cost_model_server::DISK_TMPTABLE;
11007
11008 /*
11009 Let materialization cost include the cost to create the temporary
11010 table and write the rows into it:
11011 */
11012 mat_cost+= cost_model->tmptable_create_cost(tmp_table_type);
11013 mat_cost+= cost_model->tmptable_readwrite_cost(tmp_table_type, mat_rowcount,
11014 0.0);
11015
11016 sjm->materialization_cost.reset();
11017 sjm->materialization_cost.add_io(mat_cost);
11018
11019 sjm->expected_rowcount= distinct_rowcount;
11020
11021 /*
11022 Set the cost to do a full scan of the temptable (will need this to
11023 consider doing sjm-scan):
11024 */
11025 sjm->scan_cost.reset();
11026 if (distinct_rowcount > 0.0)
11027 {
11028 const double scan_cost=
11029 cost_model->tmptable_readwrite_cost(tmp_table_type,
11030 0.0, distinct_rowcount);
11031 sjm->scan_cost.add_io(scan_cost);
11032 }
11033
11034 // The cost to lookup a row in temp. table
11035 const double row_cost= cost_model->tmptable_readwrite_cost(tmp_table_type,
11036 0.0, 1.0);
11037 sjm->lookup_cost.reset();
11038 sjm->lookup_cost.add_io(row_cost);
11039 }
11040
11041
11042 /**
11043 Decides between EXISTS and materialization; performs last steps to set up
11044 the chosen strategy.
11045 @returns 'false' if no error
11046
11047 @note If UNION this is called on each contained JOIN.
11048
11049 */
decide_subquery_strategy()11050 bool JOIN::decide_subquery_strategy()
11051 {
11052 assert(unit->item);
11053
11054 switch (unit->item->substype())
11055 {
11056 case Item_subselect::IN_SUBS:
11057 case Item_subselect::ALL_SUBS:
11058 case Item_subselect::ANY_SUBS:
11059 // All of those are children of Item_in_subselect and may use EXISTS
11060 break;
11061 default:
11062 return false;
11063 }
11064
11065 Item_in_subselect * const in_pred=
11066 static_cast<Item_in_subselect *>(unit->item);
11067
11068 Item_exists_subselect::enum_exec_method chosen_method= in_pred->exec_method;
11069 // Materialization does not allow UNION so this can't happen:
11070 assert(chosen_method != Item_exists_subselect::EXEC_MATERIALIZATION);
11071
11072 if ((chosen_method == Item_exists_subselect::EXEC_EXISTS_OR_MAT) &&
11073 compare_costs_of_subquery_strategies(&chosen_method))
11074 return true;
11075
11076 switch (chosen_method)
11077 {
11078 case Item_exists_subselect::EXEC_EXISTS:
11079 return in_pred->finalize_exists_transform(select_lex);
11080 case Item_exists_subselect::EXEC_MATERIALIZATION:
11081 return in_pred->finalize_materialization_transform(this);
11082 default:
11083 assert(false);
11084 return true;
11085 }
11086 }
11087
11088
11089 /**
11090 Tells what is the cheapest between IN->EXISTS and subquery materialization,
11091 in terms of cost, for the subquery's JOIN.
11092 Input:
11093 - join->{best_positions,best_read,best_rowcount} must contain the
11094 execution plan of EXISTS (where 'join' is the subquery's JOIN)
11095 - join2->{best_positions,best_read,best_rowcount} must be correctly set
11096 (where 'join2' is the parent join, the grandparent join, etc).
11097 Output:
11098 join->{best_positions,best_read,best_rowcount} contain the cheapest
11099 execution plan (where 'join' is the subquery's JOIN).
11100
11101 This plan choice has to happen before calling functions which set up
11102 execution structures, like JOIN::get_best_combination().
11103
11104 @param[out] method chosen method (EXISTS or materialization) will be put
11105 here.
11106 @returns false if success
11107 */
compare_costs_of_subquery_strategies(Item_exists_subselect::enum_exec_method * method)11108 bool JOIN::compare_costs_of_subquery_strategies(
11109 Item_exists_subselect::enum_exec_method *method)
11110 {
11111 *method= Item_exists_subselect::EXEC_EXISTS;
11112
11113 Item_exists_subselect::enum_exec_method allowed_strategies=
11114 select_lex->subquery_strategy(thd);
11115
11116 if (allowed_strategies == Item_exists_subselect::EXEC_EXISTS)
11117 return false;
11118
11119 assert(allowed_strategies == Item_exists_subselect::EXEC_EXISTS_OR_MAT ||
11120 allowed_strategies == Item_exists_subselect::EXEC_MATERIALIZATION);
11121
11122 const JOIN *parent_join= unit->outer_select()->join;
11123 if (!parent_join || !parent_join->child_subquery_can_materialize)
11124 return false;
11125
11126 Item_in_subselect * const in_pred=
11127 static_cast<Item_in_subselect *>(unit->item);
11128
11129 /*
11130 Testing subquery_allows_etc() at each optimization is necessary as each
11131 execution of a prepared statement may use a different type of parameter.
11132 */
11133 if (!subquery_allows_materialization(in_pred, thd, select_lex,
11134 select_lex->outer_select()))
11135 return false;
11136
11137 Opt_trace_context * const trace= &thd->opt_trace;
11138 Opt_trace_object trace_wrapper(trace);
11139 Opt_trace_object
11140 trace_subqmat(trace, "execution_plan_for_potential_materialization");
11141 const double saved_best_read= best_read;
11142 const ha_rows saved_best_rowcount= best_rowcount;
11143 POSITION * const saved_best_pos= best_positions;
11144
11145 if (in_pred->in2exists_added_to_where())
11146 {
11147 Opt_trace_array trace_subqmat_steps(trace, "steps");
11148
11149 // Up to one extra slot per semi-join nest is needed (if materialized)
11150 const uint sj_nests= select_lex->sj_nests.elements;
11151
11152 if (!(best_positions= new (thd->mem_root) POSITION[tables + sj_nests]))
11153 return true;
11154
11155 // Compute plans which do not use outer references
11156
11157 assert(allow_outer_refs);
11158 allow_outer_refs= false;
11159
11160 if (optimize_semijoin_nests_for_materialization(this))
11161 return true;
11162
11163 if (Optimize_table_order(thd, this, NULL).choose_table_order())
11164 return true;
11165 }
11166 else
11167 {
11168 /*
11169 If IN->EXISTS didn't add any condition to WHERE (only to HAVING, which
11170 can happen if subquery has aggregates) then the plan for materialization
11171 will be the same as for EXISTS - don't compute it again.
11172 */
11173 trace_subqmat.add("surely_same_plan_as_EXISTS", true).
11174 add_alnum("cause", "EXISTS_did_not_change_WHERE");
11175 }
11176
11177 Semijoin_mat_optimize sjm;
11178 calculate_materialization_costs(this, NULL, primary_tables, &sjm);
11179
11180 /*
11181 The number of evaluations of the subquery influences costs, we need to
11182 compute it.
11183 */
11184 Opt_trace_object trace_subq_mat_decision(trace, "subq_mat_decision");
11185 Opt_trace_array trace_parents(trace, "parent_fanouts");
11186 const Item_subselect *subs= in_pred;
11187 double subq_executions= 1.0;
11188 for(;;)
11189 {
11190 Opt_trace_object trace_parent(trace);
11191 trace_parent.add_select_number(parent_join->select_lex->select_number);
11192 double parent_fanout;
11193 if (// safety, not sure needed
11194 parent_join->plan_is_const() ||
11195 // if subq is in condition on constant table:
11196 !parent_join->child_subquery_can_materialize)
11197 {
11198 parent_fanout= 1.0;
11199 trace_parent.add("subq_attached_to_const_table", true);
11200 }
11201 else
11202 {
11203 if (subs->in_cond_of_tab != NO_PLAN_IDX)
11204 {
11205 /*
11206 Subquery is attached to a certain 'pos', pos[-1].prefix_rowcount
11207 is the number of times we'll start a loop accessing 'pos'; each such
11208 loop will read pos->rows_fetched rows of 'pos', so subquery will
11209 be evaluated pos[-1].prefix_rowcount * pos->rows_fetched times.
11210 Exceptions:
11211 - if 'pos' is first, use 1.0 instead of pos[-1].prefix_rowcount
11212 - if 'pos' is first of a sj-materialization nest, same.
11213
11214 If in a sj-materialization nest, pos->rows_fetched and
11215 pos[-1].prefix_rowcount are of the "nest materialization" plan
11216 (copied back in fix_semijoin_strategies()), which is
11217 appropriate as it corresponds to evaluations of our subquery.
11218
11219 pos->prefix_rowcount is not suitable because if we have:
11220 select ... from ot1 where ot1.col in
11221 (select it1.col1 from it1 where it1.col2 not in (subq));
11222 and subq does subq-mat, and plan is ot1 - it1+firstmatch(ot1),
11223 then:
11224 - t1.prefix_rowcount==1 (due to firstmatch)
11225 - subq is attached to it1, and is evaluated for each row read from
11226 t1, potentially way more than 1.
11227 */
11228 const uint idx= subs->in_cond_of_tab;
11229 assert((int)idx >= 0 && idx < parent_join->tables);
11230 trace_parent.add("subq_attached_to_table", true);
11231 QEP_TAB *const parent_tab= &parent_join->qep_tab[idx];
11232 trace_parent.add_utf8_table(parent_tab->table_ref);
11233 parent_fanout= parent_tab->position()->rows_fetched;
11234 if ((idx > parent_join->const_tables) &&
11235 !sj_is_materialize_strategy(parent_tab->position()->sj_strategy))
11236 parent_fanout*=
11237 parent_tab[-1].position()->prefix_rowcount;
11238 }
11239 else
11240 {
11241 /*
11242 Subquery is SELECT list, GROUP BY, ORDER BY, HAVING: it is evaluated
11243 at the end of the parent join's execution.
11244 It can be evaluated once per row-before-grouping:
11245 SELECT SUM(t1.col IN (subq)) FROM t1 GROUP BY expr;
11246 or once per row-after-grouping:
11247 SELECT SUM(t1.col) AS s FROM t1 GROUP BY expr HAVING s IN (subq),
11248 SELECT SUM(t1.col) IN (subq) FROM t1 GROUP BY expr
11249 It's hard to tell. We simply assume 'once per
11250 row-before-grouping'.
11251
11252 Another approximation:
11253 SELECT ... HAVING x IN (subq) LIMIT 1
11254 best_rowcount=1 due to LIMIT, though HAVING (and thus the subquery)
11255 may be evaluated many times before HAVING becomes true and the limit
11256 is reached.
11257 */
11258 trace_parent.add("subq_attached_to_join_result", true);
11259 parent_fanout= static_cast<double>(parent_join->best_rowcount);
11260 }
11261 }
11262 subq_executions*= parent_fanout;
11263 trace_parent.add("fanout", parent_fanout);
11264 const bool cacheable= parent_join->select_lex->is_cacheable();
11265 trace_parent.add("cacheable", cacheable);
11266 if (cacheable)
11267 {
11268 // Parent executed only once
11269 break;
11270 }
11271 /*
11272 Parent query is executed once per outer row => go up to find number of
11273 outer rows. Example:
11274 SELECT ... IN(subq-with-in2exists WHERE ... IN (subq-with-mat))
11275 */
11276 if (!(subs= parent_join->unit->item))
11277 {
11278 // derived table, materialized only once
11279 break;
11280 }
11281 parent_join= parent_join->unit->outer_select()->join;
11282 if (!parent_join)
11283 {
11284 /*
11285 May be single-table UPDATE/DELETE, has no join.
11286 @todo we should find how many rows it plans to UPDATE/DELETE, taking
11287 inspiration in Explain_table::explain_rows_and_filtered().
11288 This is not a priority as it applies only to
11289 UPDATE - child(non-mat-subq) - grandchild(may-be-mat-subq).
11290 And it will autosolve the day UPDATE gets a JOIN.
11291 */
11292 break;
11293 }
11294 } // for(;;)
11295 trace_parents.end();
11296
11297 const double cost_exists= subq_executions * saved_best_read;
11298 const double cost_mat_table= sjm.materialization_cost.total_cost();
11299 const double cost_mat= cost_mat_table + subq_executions *
11300 sjm.lookup_cost.total_cost();
11301 const bool mat_chosen=
11302 (allowed_strategies == Item_exists_subselect::EXEC_EXISTS_OR_MAT) ?
11303 (cost_mat < cost_exists) : true;
11304 trace_subq_mat_decision
11305 .add("cost_to_create_and_fill_materialized_table",
11306 cost_mat_table)
11307 .add("cost_of_one_EXISTS", saved_best_read)
11308 .add("number_of_subquery_evaluations", subq_executions)
11309 .add("cost_of_materialization", cost_mat)
11310 .add("cost_of_EXISTS", cost_exists)
11311 .add("chosen", mat_chosen);
11312 if (mat_chosen)
11313 *method= Item_exists_subselect::EXEC_MATERIALIZATION;
11314 else
11315 {
11316 best_read= saved_best_read;
11317 best_rowcount= saved_best_rowcount;
11318 best_positions= saved_best_pos;
11319 /*
11320 Don't restore JOIN::positions or best_ref, they're not used
11321 afterwards. best_positions is (like: by get_sj_strategy()).
11322 */
11323 }
11324 return false;
11325 }
11326
11327
11328 /**
11329 Optimize rollup specification.
11330
11331 Allocate objects needed for rollup processing.
11332
11333 @returns false if success, true if error.
11334 */
11335
optimize_rollup()11336 bool JOIN::optimize_rollup()
11337 {
11338 tmp_table_param.quick_group= 0; // Can't create groups in tmp table
11339 rollup.state= ROLLUP::STATE_INITED;
11340
11341 /*
11342 Create pointers to the different sum function groups
11343 These are updated by rollup_make_fields()
11344 */
11345 tmp_table_param.group_parts= send_group_parts;
11346 /*
11347 substitute_gc() might substitute an expression in the GROUP BY list with
11348 a generated column. In such case the GC is added to the all_fields as a
11349 hidden field. In total, all_fields list could be grown by up to
11350 send_group_parts columns. Reserve space for them here.
11351 */
11352 const uint ref_array_size= all_fields.elements + send_group_parts;
11353
11354 Item_null_result **null_items=
11355 static_cast<Item_null_result**>(thd->alloc(sizeof(Item*)*send_group_parts));
11356
11357 rollup.null_items= Item_null_array(null_items, send_group_parts);
11358 rollup.ref_pointer_arrays=
11359 static_cast<Ref_ptr_array*>
11360 (thd->alloc((sizeof(Ref_ptr_array) +
11361 ref_array_size * sizeof(Item*)) * send_group_parts));
11362 rollup.fields=
11363 static_cast<List<Item>*>(thd->alloc(sizeof(List<Item>) * send_group_parts));
11364
11365 if (!null_items || !rollup.ref_pointer_arrays || !rollup.fields)
11366 return true;
11367
11368 Item **ref_array= (Item**) (rollup.ref_pointer_arrays+send_group_parts);
11369
11370 /*
11371 Prepare space for field list for the different levels
11372 These will be filled up in rollup_make_fields()
11373 */
11374 ORDER *group= group_list;
11375 for (uint i= 0; i < send_group_parts; i++, group= group->next)
11376 {
11377 rollup.null_items[i]=
11378 new (thd->mem_root) Item_null_result((*group->item)->field_type(),
11379 (*group->item)->result_type());
11380 if (rollup.null_items[i] == NULL)
11381 return true; /* purecov: inspected */
11382 List<Item> *rollup_fields= &rollup.fields[i];
11383 rollup_fields->empty();
11384 rollup.ref_pointer_arrays[i]= Ref_ptr_array(ref_array, ref_array_size);
11385 ref_array+= ref_array_size;
11386 }
11387 for (uint i= 0; i < send_group_parts; i++)
11388 {
11389 for (uint j= 0; j < fields_list.elements; j++)
11390 rollup.fields[i].push_back(rollup.null_items[i]);
11391 }
11392 return false;
11393 }
11394
11395
11396 /**
11397 Refine the best_rowcount estimation based on what happens after tables
11398 have been joined: LIMIT and type of result sink.
11399 */
refine_best_rowcount()11400 void JOIN::refine_best_rowcount()
11401 {
11402 // If plan is const, 0 or 1 rows should be returned
11403 assert(!plan_is_const() || best_rowcount <= 1);
11404
11405 if (plan_is_const())
11406 return;
11407
11408 /*
11409 If a derived table, or a member of a UNION which itself forms a derived
11410 table:
11411 setting estimate to 0 or 1 row would mark the derived table as const.
11412 The row count is bumped to the nearest higher value, so that the
11413 query block will not be evaluated during optimization.
11414 */
11415 if (best_rowcount <= 1 &&
11416 select_lex->master_unit()->first_select()->linkage ==
11417 DERIVED_TABLE_TYPE)
11418 best_rowcount= 2;
11419
11420 /*
11421 There will be no more rows than defined in the LIMIT clause. Use it
11422 as an estimate. If LIMIT 1 is specified, the query block will be
11423 considered "const", with actual row count 0 or 1.
11424 */
11425 set_if_smaller(best_rowcount, unit->select_limit_cnt);
11426 }
11427
11428 /**
11429 @} (end of group Query_Optimizer)
11430 */
11431
11432 /**
11433 This function is used to get the key length of Item object on
11434 which one tmp field will be created during create_tmp_table.
11435 This function references KEY_PART_INFO::init_from_field().
11436
11437 @param item A inner item of outer join
11438
11439 @return The length of a item to be as a key of a temp table
11440 */
11441
get_key_length_tmp_table(Item * item)11442 static uint32 get_key_length_tmp_table(Item *item)
11443 {
11444 uint32 len= 0;
11445
11446 item= item->real_item();
11447 if (item->type() == Item::FIELD_ITEM)
11448 len= ((Item_field *)item)->field->key_length();
11449 else
11450 len= item->max_length;
11451
11452 if (item->maybe_null)
11453 len+= HA_KEY_NULL_LENGTH;
11454
11455 // references KEY_PART_INFO::init_from_field()
11456 enum_field_types type= item->field_type();
11457 if (type == MYSQL_TYPE_BLOB ||
11458 type == MYSQL_TYPE_VARCHAR ||
11459 type == MYSQL_TYPE_GEOMETRY)
11460 len+= HA_KEY_BLOB_LENGTH;
11461
11462 return len;
11463 }
11464
11465