1 /* Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23 /**
24 @file
25
26 @brief
27 mysql_select and join optimization
28
29
30 @defgroup Query_Optimizer Query Optimizer
31 @{
32 */
33
34 #include "sql_select.h"
35 #include "sql_optimizer.h"
36 #include "sql_resolver.h" // subquery_allows_materialization
37 #include "sql_executor.h"
38 #include "sql_planner.h"
39 #include "debug_sync.h" // DEBUG_SYNC
40 #include "opt_trace.h"
41 #include "sql_derived.h"
42 #include "sql_test.h"
43 #include "sql_base.h"
44 #include "sql_parse.h"
45 #include "my_bit.h"
46 #include "lock.h"
47 #include "abstract_query_plan.h"
48 #include "opt_explain_format.h" // Explain_format_flags
49
50 #include <algorithm>
51 using std::max;
52 using std::min;
53
54 static bool make_join_statistics(JOIN *join, TABLE_LIST *leaves, Item *conds,
55 Key_use_array *keyuse,
56 bool first_optimization);
57 static bool optimize_semijoin_nests_for_materialization(JOIN *join);
58 static void calculate_materialization_costs(JOIN *join, TABLE_LIST *sj_nest,
59 uint n_tables,
60 Semijoin_mat_optimize *sjm);
61 static void make_outerjoin_info(JOIN *join);
62 static bool make_join_select(JOIN *join, Item *item);
63 static bool list_contains_unique_index(JOIN_TAB *tab,
64 bool (*find_func) (Field *, void *), void *data);
65 static bool find_field_in_item_list (Field *field, void *data);
66 static bool find_field_in_order_list (Field *field, void *data);
67 static ORDER *create_distinct_group(THD *thd, Ref_ptr_array ref_pointer_array,
68 ORDER *order, List<Item> &fields,
69 List<Item> &all_fields,
70 bool *all_order_by_fields_used);
71 static TABLE *get_sort_by_table(ORDER *a,ORDER *b,TABLE_LIST *tables);
72 static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab);
73 static Item *remove_additional_cond(Item* conds);
74 static bool simplify_joins(JOIN *join, List<TABLE_LIST> *join_list,
75 Item *conds, bool top, bool in_sj,
76 Item **new_conds,
77 uint *changelog= NULL);
78 static bool record_join_nest_info(st_select_lex *select,
79 List<TABLE_LIST> *tables);
80 static uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list,
81 uint first_unused);
82 static ORDER *remove_const(JOIN *join,ORDER *first_order, Item *cond,
83 bool change_list, bool *simple_order,
84 const char *clause_type);
85 static void save_index_subquery_explain_info(JOIN_TAB *join_tab, Item* where);
86 static void trace_table_dependencies(Opt_trace_context * trace,
87 JOIN_TAB *join_tabs,
88 uint table_count);
89 static bool
90 update_ref_and_keys(THD *thd, Key_use_array *keyuse,JOIN_TAB *join_tab,
91 uint tables, Item *cond, COND_EQUAL *cond_equal,
92 table_map normal_tables, SELECT_LEX *select_lex,
93 SARGABLE_PARAM **sargables);
94 static bool pull_out_semijoin_tables(JOIN *join);
95 static void set_position(JOIN *join, uint idx, JOIN_TAB *table, Key_use *key);
96 static void add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab);
97 static ha_rows get_quick_record_count(THD *thd, SQL_SELECT *select,
98 TABLE *table,
99 const key_map *keys,ha_rows limit);
100 static void optimize_keyuse(JOIN *join, Key_use_array *keyuse_array);
101 static Item *
102 make_cond_for_table_from_pred(Item *root_cond, Item *cond,
103 table_map tables, table_map used_table,
104 bool exclude_expensive_cond);
105 static bool
106 only_eq_ref_tables(JOIN *join, ORDER *order, table_map tables,
107 table_map *cached_eq_ref_tables, table_map
108 *eq_ref_tables);
109
110 static bool can_switch_from_ref_to_range(THD *thd, JOIN_TAB *tab);
111
112 /**
113 global select optimisation.
114
115 @note
116 error code saved in field 'error'
117
118 @retval
119 0 success
120 @retval
121 1 error
122 */
123
124 int
optimize()125 JOIN::optimize()
126 {
127 ulonglong select_opts_for_readinfo;
128 uint no_jbuf_after= UINT_MAX;
129
130 DBUG_ENTER("JOIN::optimize");
131 DBUG_ASSERT(!tables || thd->lex->is_query_tables_locked());
132
133 // to prevent double initialization on EXPLAIN
134 if (optimized)
135 DBUG_RETURN(0);
136
137 // We may do transformations (like semi-join):
138 Prepare_error_tracker tracker(thd);
139
140 optimized= true;
141 const bool first_optimization= select_lex->first_cond_optimization;
142 select_lex->first_cond_optimization= false;
143
144 DEBUG_SYNC(thd, "before_join_optimize");
145
146 THD_STAGE_INFO(thd, stage_optimizing);
147
148 Opt_trace_context * const trace= &thd->opt_trace;
149 Opt_trace_object trace_wrapper(trace);
150 Opt_trace_object trace_optimize(trace, "join_optimization");
151 trace_optimize.add_select_number(select_lex->select_number);
152 Opt_trace_array trace_steps(trace, "steps");
153
154 // Needed in case optimizer short-cuts, set properly in make_tmp_tables_info()
155 fields= &select_lex->item_list;
156
157 /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
158 if (flatten_subqueries())
159 DBUG_RETURN(1); /* purecov: inspected */
160
161 /*
162 Run optimize phase for all derived tables/views used in this SELECT,
163 including those in semi-joins.
164 */
165 if (select_lex->handle_derived(thd->lex, &mysql_derived_optimize))
166 DBUG_RETURN(1);
167
168 /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
169
170 row_limit= ((select_distinct || order || group_list) ? HA_POS_ERROR :
171 unit->select_limit_cnt);
172 // m_select_limit is used to decide if we are likely to scan the whole table.
173 m_select_limit= unit->select_limit_cnt;
174 if (having || (select_options & OPTION_FOUND_ROWS))
175 m_select_limit= HA_POS_ERROR;
176 do_send_rows = (unit->select_limit_cnt > 0) ? 1 : 0;
177
178 #ifdef HAVE_REF_TO_FIELDS // Not done yet
179 /* Add HAVING to WHERE if possible */
180 if (having && !group_list && !sum_func_count)
181 {
182 if (!conds)
183 {
184 conds= having;
185 having= 0;
186 }
187 else if ((conds=new Item_cond_and(conds,having)))
188 {
189 /*
190 Item_cond_and can't be fixed after creation, so we do not check
191 conds->fixed
192 */
193 conds->fix_fields(thd, &conds);
194 conds->change_ref_to_fields(thd, tables_list);
195 conds->top_level_item();
196 having= 0;
197 }
198 }
199 #endif
200 if (first_optimization)
201 {
202 /*
203 These are permanent transformations, so new items must be
204 allocated in the statement mem root
205 */
206 Prepared_stmt_arena_holder ps_arena_holder(thd);
207
208 /* Convert all outer joins to inner joins if possible */
209 if (simplify_joins(this, join_list, conds, true, false, &conds))
210 {
211 DBUG_PRINT("error",("Error from simplify_joins"));
212 DBUG_RETURN(1);
213 }
214 if (record_join_nest_info(select_lex, join_list))
215 {
216 DBUG_PRINT("error",("Error from record_join_nest_info"));
217 DBUG_RETURN(1);
218 }
219 build_bitmap_for_nested_joins(join_list, 0);
220
221 /*
222 After permanent transformations above, prep_where created in
223 st_select_lex::fix_prepare_information() is out-of-date, we need to
224 refresh it.
225 For that We must copy "conds" because it contains AND/OR items in a
226 non-permanent memroot. And this copy must contain real items only,
227 because the new AND/OR items will not have their argument pointers
228 restored by rollback_item_tree_changes().
229 @see st_select_lex::fix_prepare_information() for problems with this.
230 @todo in WL#7082 move transformations above to before
231 st_select_lex::fix_prepare_information(), and remove this second copy
232 below.
233 */
234 select_lex->prep_where=
235 conds ? conds->copy_andor_structure(thd, true): NULL;
236 if (conds)
237 thd->change_item_tree_place(&conds, &select_lex->prep_where);
238 }
239
240 /*
241 Note: optimize_cond() makes changes to conds. Since
242 select_lex->where and conds points to the same condition, this
243 function call effectively changes select_lex->where as well.
244 */
245 conds= optimize_cond(thd, conds, &cond_equal,
246 join_list, true, &select_lex->cond_value);
247 if (thd->is_error())
248 {
249 error= 1;
250 DBUG_PRINT("error",("Error from optimize_cond"));
251 DBUG_RETURN(1);
252 }
253
254 {
255 // Note above about optimize_cond() also applies to selec_lex->having
256 having= optimize_cond(thd, having, &cond_equal, join_list, false,
257 &select_lex->having_value);
258 if (thd->is_error())
259 {
260 error= 1;
261 DBUG_PRINT("error",("Error from optimize_cond"));
262 DBUG_RETURN(1);
263 }
264 if (select_lex->cond_value == Item::COND_FALSE ||
265 select_lex->having_value == Item::COND_FALSE ||
266 (!unit->select_limit_cnt && !(select_options & OPTION_FOUND_ROWS)))
267 { /* Impossible cond */
268 zero_result_cause= select_lex->having_value == Item::COND_FALSE ?
269 "Impossible HAVING" : "Impossible WHERE";
270 tables= 0;
271 primary_tables= 0;
272 best_rowcount= 0;
273 goto setup_subq_exit;
274 }
275 }
276
277 #ifdef WITH_PARTITION_STORAGE_ENGINE
278 if (select_lex->partitioned_table_count && prune_table_partitions(thd))
279 {
280 error= 1;
281 DBUG_PRINT("error", ("Error from prune_partitions"));
282 DBUG_RETURN(1);
283 }
284 #endif
285
286 optimize_fts_limit_query();
287
288 /*
289 Try to optimize count(*), min() and max() to const fields if
290 there is implicit grouping (aggregate functions but no
291 group_list). In this case, the result set shall only contain one
292 row.
293 */
294 if (tables_list && implicit_grouping)
295 {
296 int res;
297 /*
298 opt_sum_query() returns HA_ERR_KEY_NOT_FOUND if no rows match
299 to the WHERE conditions,
300 or 1 if all items were resolved (optimized away),
301 or 0, or an error number HA_ERR_...
302
303 If all items were resolved by opt_sum_query, there is no need to
304 open any tables.
305 */
306 if ((res=opt_sum_query(thd, select_lex->leaf_tables, all_fields, conds)))
307 {
308 best_rowcount= 0;
309 if (res == HA_ERR_KEY_NOT_FOUND)
310 {
311 DBUG_PRINT("info",("No matching min/max row"));
312 zero_result_cause= "No matching min/max row";
313 tables= 0;
314 primary_tables= 0;
315 goto setup_subq_exit;
316 }
317 if (res > 1)
318 {
319 error= res;
320 DBUG_PRINT("error",("Error from opt_sum_query"));
321 DBUG_RETURN(1);
322 }
323 if (res < 0)
324 {
325 DBUG_PRINT("info",("No matching min/max row"));
326 zero_result_cause= "No matching min/max row";
327 tables= 0;
328 primary_tables= 0;
329 goto setup_subq_exit;
330 }
331 DBUG_PRINT("info",("Select tables optimized away"));
332 zero_result_cause= "Select tables optimized away";
333 tables_list= 0; // All tables resolved
334 best_rowcount= 1;
335 const_tables= primary_tables;
336 /*
337 Extract all table-independent conditions and replace the WHERE
338 clause with them. All other conditions were computed by opt_sum_query
339 and the MIN/MAX/COUNT function(s) have been replaced by constants,
340 so there is no need to compute the whole WHERE clause again.
341 Notice that make_cond_for_table() will always succeed to remove all
342 computed conditions, because opt_sum_query() is applicable only to
343 conjunctions.
344 Preserve conditions for EXPLAIN.
345 */
346 if (conds && !(thd->lex->describe & DESCRIBE_EXTENDED))
347 {
348 Item *table_independent_conds=
349 make_cond_for_table(conds, PSEUDO_TABLE_BITS, 0, 0);
350 DBUG_EXECUTE("where",
351 print_where(table_independent_conds,
352 "where after opt_sum_query()",
353 QT_ORDINARY););
354 conds= table_independent_conds;
355 }
356 goto setup_subq_exit;
357 }
358 }
359 if (!tables_list)
360 {
361 DBUG_PRINT("info",("No tables"));
362 best_rowcount= 1;
363 error= 0;
364 if (make_tmp_tables_info())
365 DBUG_RETURN(1);
366 DBUG_RETURN(0);
367 }
368 error= -1; // Error is sent to client
369 sort_by_table= get_sort_by_table(order, group_list, select_lex->leaf_tables);
370
371 /* Calculate how to do the join */
372 THD_STAGE_INFO(thd, stage_statistics);
373 if (make_join_statistics(this, select_lex->leaf_tables, conds, &keyuse,
374 first_optimization))
375 {
376 DBUG_PRINT("error",("Error: make_join_statistics() failed"));
377 DBUG_RETURN(1);
378 }
379
380 if (rollup.state != ROLLUP::STATE_NONE)
381 {
382 if (rollup_process_const_fields())
383 {
384 DBUG_PRINT("error", ("Error: rollup_process_fields() failed"));
385 DBUG_RETURN(1);
386 }
387 }
388 else
389 {
390 /* Remove distinct if only const tables */
391 select_distinct&= !plan_is_const();
392 }
393
394 if (const_table_map != found_const_table_map &&
395 !(select_options & SELECT_DESCRIBE))
396 {
397 // There is at least one empty const table
398 zero_result_cause= "no matching row in const table";
399 DBUG_PRINT("error",("Error: %s", zero_result_cause));
400 goto setup_subq_exit;
401 }
402 if (!(thd->variables.option_bits & OPTION_BIG_SELECTS) &&
403 best_read > (double) thd->variables.max_join_size &&
404 !(select_options & SELECT_DESCRIBE))
405 { /* purecov: inspected */
406 my_message(ER_TOO_BIG_SELECT, ER(ER_TOO_BIG_SELECT), MYF(0));
407 error= -1;
408 DBUG_RETURN(1);
409 }
410 if (const_tables && !thd->locked_tables_mode &&
411 !(select_options & SELECT_NO_UNLOCK))
412 {
413 TABLE *ct[MAX_TABLES];
414 for (uint i= 0; i < const_tables; i++)
415 ct[i]= join_tab[i].table;
416 mysql_unlock_some_tables(thd, ct, const_tables);
417 }
418 if (!conds && outer_join)
419 {
420 /* Handle the case where we have an OUTER JOIN without a WHERE */
421 conds=new Item_int((longlong) 1,1); // Always true
422 }
423
424 error= 0;
425 if (outer_join)
426 {
427 reset_nj_counters(join_list);
428 make_outerjoin_info(this);
429 }
430 // Assign map of "available" tables to all tables belonging to query block
431 if (!plan_is_const())
432 set_prefix_tables();
433
434 /*
435 Among the equal fields belonging to the same multiple equality
436 choose the one that is to be retrieved first and substitute
437 all references to these in where condition for a reference for
438 the selected field.
439 */
440 if (conds)
441 {
442 conds= substitute_for_best_equal_field(conds, cond_equal, map2table);
443 if (thd->is_error())
444 {
445 error= 1;
446 DBUG_PRINT("error",("Error from substitute_for_best_equal"));
447 DBUG_RETURN(1);
448 }
449 conds->update_used_tables();
450 DBUG_EXECUTE("where",
451 print_where(conds,
452 "after substitute_best_equal",
453 QT_ORDINARY););
454 }
455
456 /*
457 Perform the same optimization on field evaluation for all join conditions.
458 */
459 for (JOIN_TAB *tab= join_tab + const_tables; tab < join_tab + tables ; tab++)
460 {
461 if (tab->on_expr_ref && *tab->on_expr_ref)
462 {
463 *tab->on_expr_ref= substitute_for_best_equal_field(*tab->on_expr_ref,
464 tab->cond_equal,
465 map2table);
466 if (thd->is_error())
467 {
468 error= 1;
469 DBUG_PRINT("error",("Error from substitute_for_best_equal"));
470 DBUG_RETURN(1);
471 }
472 (*tab->on_expr_ref)->update_used_tables();
473 }
474 }
475
476 if (conds && const_table_map != found_const_table_map &&
477 (select_options & SELECT_DESCRIBE))
478 {
479 conds=new Item_int((longlong) 0,1); // Always false
480 }
481
482 if (select_lex->materialized_table_count)
483 drop_unused_derived_keys();
484
485 if (set_access_methods())
486 {
487 error= 1;
488 DBUG_PRINT("error",("Error from set_access_methods"));
489 DBUG_RETURN(1);
490 }
491
492 // Update table dependencies after assigning ref access fields
493 update_depend_map(this);
494
495 THD_STAGE_INFO(thd, stage_preparing);
496 if (result->initialize_tables(this))
497 {
498 DBUG_PRINT("error",("Error: initialize_tables() failed"));
499 DBUG_RETURN(1); // error == -1
500 }
501
502 if (make_join_select(this, conds))
503 {
504 zero_result_cause=
505 "Impossible WHERE noticed after reading const tables";
506 goto setup_subq_exit;
507 }
508
509 error= -1; /* if goto err */
510
511 /* Optimize distinct away if possible */
512 {
513 ORDER *org_order= order;
514 order= ORDER_with_src(remove_const(this, order, conds, 1, &simple_order, "ORDER BY"), order.src);;
515 if (thd->is_error())
516 {
517 error= 1;
518 DBUG_PRINT("error",("Error from remove_const"));
519 DBUG_RETURN(1);
520 }
521
522 /*
523 If we are using ORDER BY NULL or ORDER BY const_expression,
524 return result in any order (even if we are using a GROUP BY)
525 */
526 if (!order && org_order)
527 skip_sort_order= 1;
528 }
529 /*
530 Check if we can optimize away GROUP BY/DISTINCT.
531 We can do that if there are no aggregate functions, the
532 fields in DISTINCT clause (if present) and/or columns in GROUP BY
533 (if present) contain direct references to all key parts of
534 an unique index (in whatever order) and if the key parts of the
535 unique index cannot contain NULLs.
536 Note that the unique keys for DISTINCT and GROUP BY should not
537 be the same (as long as they are unique).
538
539 The FROM clause must contain a single non-constant table.
540 */
541 if (plan_is_single_table() &&
542 (group_list || select_distinct) &&
543 !tmp_table_param.sum_func_count &&
544 (!join_tab[const_tables].select ||
545 !join_tab[const_tables].select->quick ||
546 join_tab[const_tables].select->quick->get_type() !=
547 QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX))
548 {
549 if (group_list && rollup.state == ROLLUP::STATE_NONE &&
550 list_contains_unique_index(&join_tab[const_tables],
551 find_field_in_order_list,
552 (void *) group_list))
553 {
554 /*
555 We have found that grouping can be removed since groups correspond to
556 only one row anyway, but we still have to guarantee correct result
557 order. The line below effectively rewrites the query from GROUP BY
558 <fields> to ORDER BY <fields>. There are three exceptions:
559 - if skip_sort_order is set (see above), then we can simply skip
560 GROUP BY;
561 - if we are in a subquery, we don't have to maintain order
562 - we can only rewrite ORDER BY if the ORDER BY fields are 'compatible'
563 with the GROUP BY ones, i.e. either one is a prefix of another.
564 We only check if the ORDER BY is a prefix of GROUP BY. In this case
565 test_if_subpart() copies the ASC/DESC attributes from the original
566 ORDER BY fields.
567 If GROUP BY is a prefix of ORDER BY, then it is safe to leave
568 'order' as is.
569 */
570 if (!order || test_if_subpart(group_list, order))
571 {
572 if (skip_sort_order ||
573 select_lex->master_unit()->item) // This is a subquery
574 order= NULL;
575 else
576 order= group_list;
577 }
578 /*
579 If we have an IGNORE INDEX FOR GROUP BY(fields) clause, this must be
580 rewritten to IGNORE INDEX FOR ORDER BY(fields).
581 */
582 join_tab->table->keys_in_use_for_order_by=
583 join_tab->table->keys_in_use_for_group_by;
584 group_list= 0;
585 group= 0;
586 }
587 if (select_distinct &&
588 list_contains_unique_index(&join_tab[const_tables],
589 find_field_in_item_list,
590 (void *) &fields_list))
591 {
592 select_distinct= 0;
593 }
594 }
595 if (group_list || tmp_table_param.sum_func_count)
596 {
597 if (hidden_group_field_count == 0 && rollup.state == ROLLUP::STATE_NONE)
598 {
599 /*
600 All GROUP expressions are in SELECT list, so resulting rows are
601 distinct. ROLLUP is not specified, so adds no row. So all rows in the
602 result set are distinct, DISTINCT is useless.
603 @todo could remove DISTINCT if ROLLUP were specified and all GROUP
604 expressions were non-nullable, because ROLLUP adds only NULL
605 values. Currently, ROLLUP+DISTINCT is rejected because executor
606 cannot handle it in all cases.
607 */
608 select_distinct= false;
609 }
610 }
611 else if (select_distinct &&
612 plan_is_single_table() &&
613 rollup.state == ROLLUP::STATE_NONE)
614 {
615 /*
616 We are only using one table. In this case we change DISTINCT to a
617 GROUP BY query if:
618 - The GROUP BY can be done through indexes (no sort) and the ORDER
619 BY only uses selected fields.
620 (In this case we can later optimize away GROUP BY and ORDER BY)
621 - We are scanning the whole table without LIMIT
622 This can happen if:
623 - We are using CALC_FOUND_ROWS
624 - We are using an ORDER BY that can't be optimized away.
625
626 We don't want to use this optimization when we are using LIMIT
627 because in this case we can just create a temporary table that
628 holds LIMIT rows and stop when this table is full.
629 */
630 JOIN_TAB *tab= &join_tab[const_tables];
631 bool all_order_fields_used;
632 if (order)
633 {
634 skip_sort_order=
635 test_if_skip_sort_order(tab, order, m_select_limit,
636 true, // no_changes
637 &tab->table->keys_in_use_for_order_by,
638 "ORDER BY");
639 }
640 ORDER *o;
641 if ((o= create_distinct_group(thd, ref_ptrs,
642 order, fields_list, all_fields,
643 &all_order_fields_used)))
644 {
645 group_list= ORDER_with_src(o, ESC_DISTINCT);
646 const bool skip_group=
647 skip_sort_order &&
648 test_if_skip_sort_order(tab, group_list, m_select_limit,
649 true, // no_changes
650 &tab->table->keys_in_use_for_group_by,
651 "GROUP BY");
652 count_field_types(select_lex, &tmp_table_param, all_fields, 0);
653 if ((skip_group && all_order_fields_used) ||
654 m_select_limit == HA_POS_ERROR ||
655 (order && !skip_sort_order))
656 {
657 /* Change DISTINCT to GROUP BY */
658 select_distinct= 0;
659 no_order= !order;
660 if (all_order_fields_used)
661 {
662 if (order && skip_sort_order)
663 {
664 /*
665 Force MySQL to read the table in sorted order to get result in
666 ORDER BY order.
667 */
668 tmp_table_param.quick_group=0;
669 }
670 order=0;
671 }
672 group=1; // For end_write_group
673 }
674 else
675 group_list= 0;
676 }
677 else if (thd->is_fatal_error) // End of memory
678 DBUG_RETURN(1);
679 }
680 simple_group= 0;
681 {
682 ORDER *old_group_list= group_list;
683 group_list= ORDER_with_src(remove_const(this, group_list, conds,
684 rollup.state == ROLLUP::STATE_NONE,
685 &simple_group, "GROUP BY"),
686 group_list.src);
687
688 if (thd->is_error())
689 {
690 error= 1;
691 DBUG_PRINT("error",("Error from remove_const"));
692 DBUG_RETURN(1);
693 }
694 if (old_group_list && !group_list)
695 select_distinct= 0;
696 }
697 if (!group_list && group)
698 {
699 order=0; // The output has only one row
700 simple_order=1;
701 select_distinct= 0; // No need in distinct for 1 row
702 group_optimized_away= 1;
703 }
704
705 calc_group_buffer(this, group_list);
706 send_group_parts= tmp_table_param.group_parts; /* Save org parts */
707
708 if (test_if_subpart(group_list, order) ||
709 (!group_list && tmp_table_param.sum_func_count))
710 {
711 order=0;
712 if (is_indexed_agg_distinct(this, NULL))
713 sort_and_group= 0;
714 }
715
716 /*
717 If the hint FORCE INDEX FOR ORDER BY/GROUP BY is used for the first
718 table (it does not make sense for other tables) then we cannot do join
719 buffering.
720 */
721 if (!plan_is_const())
722 {
723 const TABLE * const first= join_tab[const_tables].table;
724 if ((first->force_index_order && order) ||
725 (first->force_index_group && group_list))
726 no_jbuf_after= 0;
727 }
728
729 select_opts_for_readinfo=
730 (select_options & (SELECT_DESCRIBE | SELECT_NO_JOIN_CACHE)) |
731 (select_lex->ftfunc_list->elements ? SELECT_NO_JOIN_CACHE : 0);
732
733 if (make_join_readinfo(this, select_opts_for_readinfo, no_jbuf_after))
734 DBUG_RETURN(1);
735
736 /*
737 Check if we need to create a temporary table.
738 This has to be done if all tables are not already read (const tables)
739 and one of the following conditions holds:
740 - We are using DISTINCT (simple distinct's are already optimized away)
741 - We are using an ORDER BY or GROUP BY on fields not in the first table
742 - We are using different ORDER BY and GROUP BY orders
743 - The user wants us to buffer the result.
744 When the WITH ROLLUP modifier is present, we cannot skip temporary table
745 creation for the DISTINCT clause just because there are only const tables.
746 */
747 need_tmp= ((!plan_is_const() &&
748 ((select_distinct || !simple_order || !simple_group) ||
749 (group_list && order) ||
750 MY_TEST(select_options & OPTION_BUFFER_RESULT))) ||
751 (rollup.state != ROLLUP::STATE_NONE && select_distinct));
752
753 /* Perform FULLTEXT search before all regular searches */
754 if (!(select_options & SELECT_DESCRIBE) &&
755 !select_lex->materialized_table_count &&
756 select_lex->has_ft_funcs())
757 {
758 if (init_ftfuncs(thd, select_lex, order))
759 DBUG_RETURN(1);
760 optimize_fts_query();
761 }
762
763 /*
764 By setting child_subquery_can_materialize so late we gain the following:
765 JOIN::compare_costs_of_subquery_strategies() can test this variable to
766 know if we are have finished evaluating constant conditions, which itself
767 helps determining fanouts.
768 */
769 child_subquery_can_materialize= true;
770
771 /*
772 It's necessary to check const part of HAVING cond as
773 there is a chance that some cond parts may become
774 const items after make_join_statisctics(for example
775 when Item is a reference to cost table field from
776 outer join).
777 This check is performed only for those conditions
778 which do not use aggregate functions. In such case
779 temporary table may not be used and const condition
780 elements may be lost during further having
781 condition transformation in JOIN::exec.
782 */
783 if (having && const_table_map && !having->with_sum_func)
784 {
785 having->update_used_tables();
786 having= remove_eq_conds(thd, having, &select_lex->having_value);
787 if (select_lex->having_value == Item::COND_FALSE)
788 {
789 having= having_for_explain= new Item_int((longlong) 0,1);
790 zero_result_cause= "Impossible HAVING noticed after reading const tables";
791 error= 0;
792 DBUG_RETURN(0);
793 }
794 }
795
796 /* Cache constant expressions in WHERE, HAVING, ON clauses. */
797 if (!plan_is_const() && cache_const_exprs())
798 DBUG_RETURN(1);
799
800 // See if this subquery can be evaluated with subselect_indexsubquery_engine
801 if (!group_list && !order &&
802 unit->item && unit->item->substype() == Item_subselect::IN_SUBS &&
803 primary_tables == 1 && conds &&
804 !unit->is_union())
805 {
806 bool changed= FALSE;
807 subselect_engine *engine= 0;
808 Item_in_subselect * const in_subs=
809 static_cast<Item_in_subselect *>(unit->item);
810 if (in_subs->exec_method == Item_exists_subselect::EXEC_MATERIALIZATION)
811 {
812 // We cannot have two engines at the same time
813 }
814 else if (!having)
815 {
816 Item *where= conds;
817 if (join_tab[0].type == JT_EQ_REF &&
818 join_tab[0].ref.items[0]->item_name.ptr() == in_left_expr_name)
819 {
820 remove_subq_pushed_predicates(&where);
821 save_index_subquery_explain_info(join_tab, where);
822 join_tab[0].type= JT_UNIQUE_SUBQUERY;
823 error= 0;
824 changed= TRUE;
825 engine= new subselect_indexsubquery_engine(thd, join_tab, unit->item,
826 where, NULL /* having */,
827 false /* check_null */,
828 true /* unique */);
829 }
830 else if (join_tab[0].type == JT_REF &&
831 join_tab[0].ref.items[0]->item_name.ptr() == in_left_expr_name)
832 {
833 remove_subq_pushed_predicates(&where);
834 save_index_subquery_explain_info(join_tab, where);
835 join_tab[0].type= JT_INDEX_SUBQUERY;
836 error= 0;
837 changed= TRUE;
838 engine= new subselect_indexsubquery_engine(thd, join_tab, unit->item,
839 where, NULL, false, false);
840 }
841 } else if (join_tab[0].type == JT_REF_OR_NULL &&
842 join_tab[0].ref.items[0]->item_name.ptr() == in_left_expr_name &&
843 having->item_name.ptr() == in_having_cond)
844 {
845 join_tab[0].type= JT_INDEX_SUBQUERY;
846 error= 0;
847 changed= TRUE;
848 conds= remove_additional_cond(conds);
849 save_index_subquery_explain_info(join_tab, conds);
850 engine= new subselect_indexsubquery_engine(thd, join_tab, unit->item,
851 conds, having, true, false);
852 /**
853 @todo Above we passed unique=false. But for this query:
854 (oe1, oe2) IN (SELECT primary_key, non_key_maybe_null_field FROM tbl)
855 we could use "unique=true" for the first index component and let
856 Item_is_not_null_test(non_key_maybe_null_field) handle the second.
857 */
858 }
859 if (changed)
860 {
861 /*
862 We leave optimize() because the rest of it is only about order/group
863 which those subqueries don't have.
864 @todo: let execution flow down instead, to be future-proof.
865 */
866 DBUG_RETURN(unit->item->change_engine(engine));
867 }
868 }
869 /*
870 Need to tell handlers that to play it safe, it should fetch all
871 columns of the primary key of the tables: this is because MySQL may
872 build row pointers for the rows, and for all columns of the primary key
873 the read set has not necessarily been set by the server code.
874 */
875 if (need_tmp || select_distinct || group_list || order)
876 {
877 for (uint i = const_tables; i < primary_tables; i++)
878 join_tab[i].table->prepare_for_position();
879 }
880 DBUG_EXECUTE("info", TEST_join(this););
881
882 if (!plan_is_const())
883 {
884 JOIN_TAB *tab= &join_tab[const_tables];
885
886 if (order)
887 {
888 /*
889 Force using of tmp table if sorting by a SP or UDF function due to
890 their expensive and probably non-deterministic nature.
891 */
892 for (ORDER *tmp_order= order; tmp_order ; tmp_order=tmp_order->next)
893 {
894 Item *item= *tmp_order->item;
895 if (item->is_expensive())
896 {
897 /* Force tmp table without sort */
898 need_tmp=1; simple_order=simple_group=0;
899 break;
900 }
901 }
902 }
903
904 /*
905 Because filesort always does a full table scan or a quick range scan
906 we must add the removed reference to the select for the table.
907 We only need to do this when we have a simple_order or simple_group
908 as in other cases the join is done before the sort.
909 */
910 if ((order || group_list) &&
911 tab->type != JT_ALL &&
912 tab->type != JT_FT &&
913 tab->type != JT_REF_OR_NULL &&
914 ((order && simple_order) || (group_list && simple_group)))
915 {
916 if (add_ref_to_table_cond(thd,tab)) {
917 DBUG_RETURN(1);
918 }
919 }
920
921 /*
922 Investigate whether we may use an ordered index as part of either
923 DISTINCT, GROUP BY or ORDER BY execution. An ordered index may be
924 used for only the first of any of these terms to be executed. This
925 is reflected in the order which we check for test_if_skip_sort_order()
926 below. However we do not check for DISTINCT here, as it would have
927 been transformed to a GROUP BY at this stage if it is a candidate for
928 ordered index optimization.
929 If a decision was made to use an ordered index, the availability
930 if such an access path is stored in 'ordered_index_usage' for later
931 use by 'execute' or 'explain'
932 */
933 DBUG_ASSERT(ordered_index_usage == ordered_index_void);
934
935 if (group_list) // GROUP BY honoured first
936 // (DISTINCT was rewritten to GROUP BY if skippable)
937 {
938 /*
939 When there is SQL_BIG_RESULT do not sort using index for GROUP BY,
940 and thus force sorting on disk unless a group min-max optimization
941 is going to be used as it is applied now only for one table queries
942 with covering indexes.
943 */
944 if (!(select_options & SELECT_BIG_RESULT) ||
945 (tab->select &&
946 tab->select->quick &&
947 tab->select->quick->get_type() ==
948 QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX))
949 {
950 if (simple_group && // GROUP BY is possibly skippable
951 !select_distinct) // .. if not preceded by a DISTINCT
952 {
953 /*
954 Calculate a possible 'limit' of table rows for 'GROUP BY':
955 A specified 'LIMIT' is relative to the final resultset.
956 'need_tmp' implies that there will be more postprocessing
957 so the specified 'limit' should not be enforced yet.
958 */
959 const ha_rows limit = need_tmp ? HA_POS_ERROR : m_select_limit;
960
961 if (test_if_skip_sort_order(tab, group_list, limit, false,
962 &tab->table->keys_in_use_for_group_by,
963 "GROUP BY"))
964 {
965 ordered_index_usage= ordered_index_group_by;
966 }
967 }
968
969 /*
970 If we are going to use semi-join LooseScan, it will depend
971 on the selected index scan to be used. If index is not used
972 for the GROUP BY, we risk that sorting is put on the LooseScan
973 table. In order to avoid this, force use of temporary table.
974 TODO: Explain the quick_group part of the test below.
975 */
976 if ((ordered_index_usage != ordered_index_group_by) &&
977 (tmp_table_param.quick_group ||
978 (tab->emb_sj_nest &&
979 tab->position->sj_strategy == SJ_OPT_LOOSE_SCAN)))
980 {
981 need_tmp=1;
982 simple_order= simple_group= false; // Force tmp table without sort
983 }
984 }
985 }
986 else if (order && // ORDER BY wo/ preceeding GROUP BY
987 (simple_order || skip_sort_order)) // which is possibly skippable
988 {
989 if (test_if_skip_sort_order(tab, order, m_select_limit, false,
990 &tab->table->keys_in_use_for_order_by,
991 "ORDER BY"))
992 {
993 ordered_index_usage= ordered_index_order_by;
994 }
995 }
996 }
997
998 /**
999 * Push joins to handler(s) whenever possible.
1000 * The handlers will inspect the QEP through the
1001 * AQP (Abstract Query Plan), and extract from it
1002 * whatewer it might implement of pushed execution.
1003 * It is the responsibility if the handler to store any
1004 * information it need for later execution of pushed queries.
1005 *
1006 * Currently pushed joins are only implemented by NDB.
1007 * It only make sense to try pushing if > 1 non-const tables.
1008 */
1009 if (!plan_is_const() && !plan_is_single_table())
1010 {
1011 const AQP::Join_plan plan(this);
1012 if (ha_make_pushed_joins(thd, &plan))
1013 DBUG_RETURN(1);
1014 }
1015
1016 /**
1017 * Set up access functions for the tables as
1018 * required by the selected access type.
1019 */
1020 for (uint i= const_tables; i < tables; i++)
1021 {
1022 pick_table_access_method (&join_tab[i]);
1023 }
1024
1025 if (make_tmp_tables_info())
1026 DBUG_RETURN(1);
1027
1028 error= 0;
1029 DBUG_RETURN(0);
1030
1031 setup_subq_exit:
1032
1033 DBUG_ASSERT(zero_result_cause != NULL);
1034 /*
1035 Even with zero matching rows, subqueries in the HAVING clause may
1036 need to be evaluated if there are aggregate functions in the
1037 query. If this JOIN is part of an outer query, subqueries in HAVING may
1038 be evaluated several times in total; so subquery materialization makes
1039 sense.
1040 */
1041 child_subquery_can_materialize= true;
1042 trace_steps.end(); // because all steps are done
1043 Opt_trace_object(trace, "empty_result")
1044 .add_alnum("cause", zero_result_cause);
1045
1046 having_for_explain= having;
1047 error= 0;
1048 DBUG_RETURN(0);
1049 }
1050
1051
1052 #ifdef WITH_PARTITION_STORAGE_ENGINE
1053
1054 /**
1055 Prune partitions for all tables of a join (query block).
1056
1057 Requires that tables have been locked.
1058
1059 @param thd Thread pointer
1060
1061 @returns false if success, true if error
1062 */
prune_table_partitions(THD * thd)1063 bool JOIN::prune_table_partitions(THD *thd)
1064 {
1065 DBUG_ASSERT(select_lex->partitioned_table_count);
1066
1067 for (TABLE_LIST *tbl= select_lex->leaf_tables; tbl; tbl= tbl->next_leaf)
1068 {
1069 /*
1070 If tbl->embedding!=NULL that means that this table is in the inner
1071 part of the nested outer join, and we can't do partition pruning
1072 (TODO: check if this limitation can be lifted.
1073 This also excludes semi-joins. Is that intentional?)
1074 This will try to prune non-static conditions, which can
1075 be used after the tables are locked.
1076 */
1077 if (!tbl->embedding)
1078 {
1079 if (prune_partitions(thd, tbl->table,
1080 tbl->join_cond() ? tbl->join_cond() : conds))
1081 return true;
1082 }
1083 }
1084
1085 return false;
1086 }
1087
1088 #endif
1089
1090
1091 /**
1092 Set NESTED_JOIN::counter=0 in all nested joins in passed list.
1093
1094 Recursively set NESTED_JOIN::counter=0 for all nested joins contained in
1095 the passed join_list.
1096
1097 @param join_list List of nested joins to process. It may also contain base
1098 tables which will be ignored.
1099 */
1100
reset_nj_counters(List<TABLE_LIST> * join_list)1101 void reset_nj_counters(List<TABLE_LIST> *join_list)
1102 {
1103 List_iterator<TABLE_LIST> li(*join_list);
1104 TABLE_LIST *table;
1105 DBUG_ENTER("reset_nj_counters");
1106 while ((table= li++))
1107 {
1108 NESTED_JOIN *nested_join;
1109 if ((nested_join= table->nested_join))
1110 {
1111 nested_join->nj_counter= 0;
1112 reset_nj_counters(&nested_join->join_list);
1113 }
1114 }
1115 DBUG_VOID_RETURN;
1116 }
1117
1118
1119 /*****************************************************************************
1120 Make some simple condition optimization:
1121 If there is a test 'field = const' change all refs to 'field' to 'const'
1122 Remove all dummy tests 'item = item', 'const op const'.
1123 Remove all 'item is NULL', when item can never be null!
1124 item->marker should be 0 for all items on entry
1125 Return in cond_value FALSE if condition is impossible (1 = 2)
1126 *****************************************************************************/
1127
1128 class COND_CMP :public ilink<COND_CMP> {
1129 public:
operator new(size_t size)1130 static void *operator new(size_t size)
1131 {
1132 return (void*) sql_alloc((uint) size);
1133 }
operator delete(void * ptr MY_ATTRIBUTE ((unused)),size_t size MY_ATTRIBUTE ((unused)))1134 static void operator delete(void *ptr MY_ATTRIBUTE((unused)),
1135 size_t size MY_ATTRIBUTE((unused)))
1136 { TRASH(ptr, size); }
1137
1138 Item *and_level;
1139 Item_func *cmp_func;
COND_CMP(Item * a,Item_func * b)1140 COND_CMP(Item *a,Item_func *b) :and_level(a),cmp_func(b) {}
1141 };
1142
1143
1144 /**
1145 Find the multiple equality predicate containing a field.
1146
1147 The function retrieves the multiple equalities accessed through
1148 the con_equal structure from current level and up looking for
1149 an equality containing field. It stops retrieval as soon as the equality
1150 is found and set up inherited_fl to TRUE if it's found on upper levels.
1151
1152 @param cond_equal multiple equalities to search in
1153 @param field field to look for
1154 @param[out] inherited_fl set up to TRUE if multiple equality is found
1155 on upper levels (not on current level of
1156 cond_equal)
1157
1158 @return
1159 - Item_equal for the found multiple equality predicate if a success;
1160 - NULL otherwise.
1161 */
1162
find_item_equal(COND_EQUAL * cond_equal,Field * field,bool * inherited_fl)1163 Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field,
1164 bool *inherited_fl)
1165 {
1166 Item_equal *item= 0;
1167 bool in_upper_level= FALSE;
1168 while (cond_equal)
1169 {
1170 List_iterator_fast<Item_equal> li(cond_equal->current_level);
1171 while ((item= li++))
1172 {
1173 if (item->contains(field))
1174 goto finish;
1175 }
1176 in_upper_level= TRUE;
1177 cond_equal= cond_equal->upper_levels;
1178 }
1179 in_upper_level= FALSE;
1180 finish:
1181 *inherited_fl= in_upper_level;
1182 return item;
1183 }
1184
1185
1186 /**
1187 Get the best field substitution for a given field.
1188
1189 If the field is member of a multiple equality, look up that equality
1190 and return the most appropriate field. Usually this is the equivalenced
1191 field belonging to the outer-most table in the join order, but
1192 @see Item_field::get_subst_item() for details.
1193 Otherwise, return the same field.
1194
1195 @param item_field The field that we are seeking a substitution for.
1196 @param cond_equal multiple equalities to search in
1197
1198 @return The substituted field.
1199 */
1200
get_best_field(Item_field * item_field,COND_EQUAL * cond_equal)1201 Item_field *get_best_field(Item_field *item_field, COND_EQUAL *cond_equal)
1202 {
1203 bool dummy;
1204 Item_equal *item_eq= find_item_equal(cond_equal, item_field->field, &dummy);
1205 if (!item_eq)
1206 return item_field;
1207
1208 return item_eq->get_subst_item(item_field);
1209 }
1210
1211
1212 /**
1213 Check whether an equality can be used to build multiple equalities.
1214
1215 This function first checks whether the equality (left_item=right_item)
1216 is a simple equality i.e. the one that equates a field with another field
1217 or a constant (field=field_item or field=const_item).
1218 If this is the case the function looks for a multiple equality
1219 in the lists referenced directly or indirectly by cond_equal inferring
1220 the given simple equality. If it doesn't find any, it builds a multiple
1221 equality that covers the predicate, i.e. the predicate can be inferred
1222 from this multiple equality.
1223 The built multiple equality could be obtained in such a way:
1224 create a binary multiple equality equivalent to the predicate, then
1225 merge it, if possible, with one of old multiple equalities.
1226 This guarantees that the set of multiple equalities covering equality
1227 predicates will be minimal.
1228
1229 EXAMPLE:
1230 For the where condition
1231 @code
1232 WHERE a=b AND b=c AND
1233 (b=2 OR f=e)
1234 @endcode
1235 the check_equality will be called for the following equality
1236 predicates a=b, b=c, b=2 and f=e.
1237 - For a=b it will be called with *cond_equal=(0,[]) and will transform
1238 *cond_equal into (0,[Item_equal(a,b)]).
1239 - For b=c it will be called with *cond_equal=(0,[Item_equal(a,b)])
1240 and will transform *cond_equal into CE=(0,[Item_equal(a,b,c)]).
1241 - For b=2 it will be called with *cond_equal=(ptr(CE),[])
1242 and will transform *cond_equal into (ptr(CE),[Item_equal(2,a,b,c)]).
1243 - For f=e it will be called with *cond_equal=(ptr(CE), [])
1244 and will transform *cond_equal into (ptr(CE),[Item_equal(f,e)]).
1245
1246 @note
1247 Now only fields that have the same type definitions (verified by
1248 the Field::eq_def method) are placed to the same multiple equalities.
1249 Because of this some equality predicates are not eliminated and
1250 can be used in the constant propagation procedure.
1251 We could weeken the equlity test as soon as at least one of the
1252 equal fields is to be equal to a constant. It would require a
1253 more complicated implementation: we would have to store, in
1254 general case, its own constant for each fields from the multiple
1255 equality. But at the same time it would allow us to get rid
1256 of constant propagation completely: it would be done by the call
1257 to build_equal_items_for_cond.
1258
1259
1260 The implementation does not follow exactly the above rules to
1261 build a new multiple equality for the equality predicate.
1262 If it processes the equality of the form field1=field2, it
1263 looks for multiple equalities me1 containig field1 and me2 containing
1264 field2. If only one of them is found the fuction expands it with
1265 the lacking field. If multiple equalities for both fields are
1266 found they are merged. If both searches fail a new multiple equality
1267 containing just field1 and field2 is added to the existing
1268 multiple equalities.
1269 If the function processes the predicate of the form field1=const,
1270 it looks for a multiple equality containing field1. If found, the
1271 function checks the constant of the multiple equality. If the value
1272 is unknown, it is setup to const. Otherwise the value is compared with
1273 const and the evaluation of the equality predicate is performed.
1274 When expanding/merging equality predicates from the upper levels
1275 the function first copies them for the current level. It looks
1276 acceptable, as this happens rarely. The implementation without
1277 copying would be much more complicated.
1278
1279 @param left_item left term of the quality to be checked
1280 @param right_item right term of the equality to be checked
1281 @param item equality item if the equality originates from a condition
1282 predicate, 0 if the equality is the result of row
1283 elimination
1284 @param cond_equal multiple equalities that must hold together with the
1285 equality
1286
1287 @retval
1288 TRUE if the predicate is a simple equality predicate to be used
1289 for building multiple equalities
1290 @retval
1291 FALSE otherwise
1292 */
1293
check_simple_equality(Item * left_item,Item * right_item,Item * item,COND_EQUAL * cond_equal)1294 static bool check_simple_equality(Item *left_item, Item *right_item,
1295 Item *item, COND_EQUAL *cond_equal)
1296 {
1297 if (left_item->type() == Item::REF_ITEM &&
1298 ((Item_ref*)left_item)->ref_type() == Item_ref::VIEW_REF)
1299 {
1300 if (((Item_ref*)left_item)->depended_from)
1301 return FALSE;
1302 left_item= left_item->real_item();
1303 }
1304 if (right_item->type() == Item::REF_ITEM &&
1305 ((Item_ref*)right_item)->ref_type() == Item_ref::VIEW_REF)
1306 {
1307 if (((Item_ref*)right_item)->depended_from)
1308 return FALSE;
1309 right_item= right_item->real_item();
1310 }
1311 if (left_item->type() == Item::FIELD_ITEM &&
1312 right_item->type() == Item::FIELD_ITEM &&
1313 !((Item_field*)left_item)->depended_from &&
1314 !((Item_field*)right_item)->depended_from)
1315 {
1316 /* The predicate the form field1=field2 is processed */
1317
1318 Field *left_field= ((Item_field*) left_item)->field;
1319 Field *right_field= ((Item_field*) right_item)->field;
1320
1321 if (!left_field->eq_def(right_field))
1322 return FALSE;
1323
1324 /* Search for multiple equalities containing field1 and/or field2 */
1325 bool left_copyfl, right_copyfl;
1326 Item_equal *left_item_equal=
1327 find_item_equal(cond_equal, left_field, &left_copyfl);
1328 Item_equal *right_item_equal=
1329 find_item_equal(cond_equal, right_field, &right_copyfl);
1330
1331 /* As (NULL=NULL) != TRUE we can't just remove the predicate f=f */
1332 if (left_field->eq(right_field)) /* f = f */
1333 return (!(left_field->maybe_null() && !left_item_equal));
1334
1335 if (left_item_equal && left_item_equal == right_item_equal)
1336 {
1337 /*
1338 The equality predicate is inference of one of the existing
1339 multiple equalities, i.e the condition is already covered
1340 by upper level equalities
1341 */
1342 return TRUE;
1343 }
1344
1345 /* Copy the found multiple equalities at the current level if needed */
1346 if (left_copyfl)
1347 {
1348 /* left_item_equal of an upper level contains left_item */
1349 left_item_equal= new Item_equal(left_item_equal);
1350 cond_equal->current_level.push_back(left_item_equal);
1351 }
1352 if (right_copyfl)
1353 {
1354 /* right_item_equal of an upper level contains right_item */
1355 right_item_equal= new Item_equal(right_item_equal);
1356 cond_equal->current_level.push_back(right_item_equal);
1357 }
1358
1359 if (left_item_equal)
1360 {
1361 /* left item was found in the current or one of the upper levels */
1362 if (! right_item_equal)
1363 left_item_equal->add((Item_field *) right_item);
1364 else
1365 {
1366 /* Merge two multiple equalities forming a new one */
1367 left_item_equal->merge(right_item_equal);
1368 /* Remove the merged multiple equality from the list */
1369 List_iterator<Item_equal> li(cond_equal->current_level);
1370 while ((li++) != right_item_equal) ;
1371 li.remove();
1372 }
1373 }
1374 else
1375 {
1376 /* left item was not found neither the current nor in upper levels */
1377 if (right_item_equal)
1378 {
1379 right_item_equal->add((Item_field *) left_item);
1380 }
1381 else
1382 {
1383 /* None of the fields was found in multiple equalities */
1384 Item_equal *item_equal= new Item_equal((Item_field *) left_item,
1385 (Item_field *) right_item);
1386 cond_equal->current_level.push_back(item_equal);
1387 }
1388 }
1389 return TRUE;
1390 }
1391
1392 {
1393 /* The predicate of the form field=const/const=field is processed */
1394 Item *const_item= 0;
1395 Item_field *field_item= 0;
1396 if (left_item->type() == Item::FIELD_ITEM &&
1397 !((Item_field*)left_item)->depended_from &&
1398 right_item->const_item())
1399 {
1400 field_item= (Item_field*) left_item;
1401 const_item= right_item;
1402 }
1403 else if (right_item->type() == Item::FIELD_ITEM &&
1404 !((Item_field*)right_item)->depended_from &&
1405 left_item->const_item())
1406 {
1407 field_item= (Item_field*) right_item;
1408 const_item= left_item;
1409 }
1410
1411 if (const_item &&
1412 field_item->result_type() == const_item->result_type())
1413 {
1414 bool copyfl;
1415
1416 if (field_item->result_type() == STRING_RESULT)
1417 {
1418 const CHARSET_INFO *cs= field_item->field->charset();
1419 if (!item)
1420 {
1421 Item_func_eq *eq_item;
1422 if (!(eq_item= new Item_func_eq(left_item, right_item)) ||
1423 eq_item->set_cmp_func())
1424 return FALSE;
1425 eq_item->quick_fix_field();
1426 item= eq_item;
1427 }
1428 if ((cs != ((Item_func *) item)->compare_collation()) ||
1429 !cs->coll->propagate(cs, 0, 0))
1430 return FALSE;
1431 }
1432
1433 Item_equal *item_equal = find_item_equal(cond_equal,
1434 field_item->field, ©fl);
1435 if (copyfl)
1436 {
1437 item_equal= new Item_equal(item_equal);
1438 cond_equal->current_level.push_back(item_equal);
1439 }
1440 if (item_equal)
1441 {
1442 /*
1443 The flag cond_false will be set to 1 after this, if item_equal
1444 already contains a constant and its value is not equal to
1445 the value of const_item.
1446 */
1447 item_equal->add(const_item, field_item);
1448 }
1449 else
1450 {
1451 item_equal= new Item_equal(const_item, field_item);
1452 cond_equal->current_level.push_back(item_equal);
1453 }
1454 return TRUE;
1455 }
1456 }
1457 return FALSE;
1458 }
1459
1460
1461 /**
1462 Convert row equalities into a conjunction of regular equalities.
1463
1464 The function converts a row equality of the form (E1,...,En)=(E'1,...,E'n)
1465 into a list of equalities E1=E'1,...,En=E'n. For each of these equalities
1466 Ei=E'i the function checks whether it is a simple equality or a row
1467 equality. If it is a simple equality it is used to expand multiple
1468 equalities of cond_equal. If it is a row equality it converted to a
1469 sequence of equalities between row elements. If Ei=E'i is neither a
1470 simple equality nor a row equality the item for this predicate is added
1471 to eq_list.
1472
1473 @param thd thread handle
1474 @param left_row left term of the row equality to be processed
1475 @param right_row right term of the row equality to be processed
1476 @param cond_equal multiple equalities that must hold together with the
1477 predicate
1478 @param eq_list results of conversions of row equalities that are not
1479 simple enough to form multiple equalities
1480
1481 @retval
1482 TRUE if conversion has succeeded (no fatal error)
1483 @retval
1484 FALSE otherwise
1485 */
1486
check_row_equality(THD * thd,Item * left_row,Item_row * right_row,COND_EQUAL * cond_equal,List<Item> * eq_list)1487 static bool check_row_equality(THD *thd, Item *left_row, Item_row *right_row,
1488 COND_EQUAL *cond_equal, List<Item>* eq_list)
1489 {
1490 uint n= left_row->cols();
1491 for (uint i= 0 ; i < n; i++)
1492 {
1493 bool is_converted;
1494 Item *left_item= left_row->element_index(i);
1495 Item *right_item= right_row->element_index(i);
1496 if (left_item->type() == Item::ROW_ITEM &&
1497 right_item->type() == Item::ROW_ITEM)
1498 {
1499 is_converted= check_row_equality(thd,
1500 (Item_row *) left_item,
1501 (Item_row *) right_item,
1502 cond_equal, eq_list);
1503 if (!is_converted)
1504 thd->lex->current_select->cond_count++;
1505 }
1506 else
1507 {
1508 is_converted= check_simple_equality(left_item, right_item, 0, cond_equal);
1509 thd->lex->current_select->cond_count++;
1510 }
1511
1512 if (!is_converted)
1513 {
1514 Item_func_eq *eq_item;
1515 if (!(eq_item= new Item_func_eq(left_item, right_item)) ||
1516 eq_item->set_cmp_func())
1517 return FALSE;
1518 eq_item->quick_fix_field();
1519 eq_list->push_back(eq_item);
1520 }
1521 }
1522 return TRUE;
1523 }
1524
1525
1526 /**
1527 Eliminate row equalities and form multiple equalities predicates.
1528
1529 This function checks whether the item is a simple equality
1530 i.e. the one that equates a field with another field or a constant
1531 (field=field_item or field=constant_item), or, a row equality.
1532 For a simple equality the function looks for a multiple equality
1533 in the lists referenced directly or indirectly by cond_equal inferring
1534 the given simple equality. If it doesn't find any, it builds/expands
1535 multiple equality that covers the predicate.
1536 Row equalities are eliminated substituted for conjunctive regular
1537 equalities which are treated in the same way as original equality
1538 predicates.
1539
1540 @param thd thread handle
1541 @param item predicate to process
1542 @param cond_equal multiple equalities that must hold together with the
1543 predicate
1544 @param eq_list results of conversions of row equalities that are not
1545 simple enough to form multiple equalities
1546
1547 @retval
1548 TRUE if re-writing rules have been applied
1549 @retval
1550 FALSE otherwise, i.e.
1551 if the predicate is not an equality,
1552 or, if the equality is neither a simple one nor a row equality,
1553 or, if the procedure fails by a fatal error.
1554
1555 @note If the equality was created by IN->EXISTS, it may be removed later by
1556 subquery materialization. So we don't mix this possibly temporary equality
1557 with others; if we let it go into a multiple-equality (Item_equal), then we
1558 could not remove it later. There is however an exception: if the outer
1559 expression is a constant, it is safe to leave the equality even in
1560 materialization; all it can do is preventing NULL/FALSE distinction but if
1561 such distinction mattered the equality would be in a triggered condition so
1562 we would not come to this function. And injecting constants is good because
1563 it makes the materialized table smaller.
1564 */
1565
check_equality(THD * thd,Item * item,COND_EQUAL * cond_equal,List<Item> * eq_list)1566 static bool check_equality(THD *thd, Item *item, COND_EQUAL *cond_equal,
1567 List<Item> *eq_list)
1568 {
1569 if (item->type() == Item::FUNC_ITEM &&
1570 ((Item_func*) item)->functype() == Item_func::EQ_FUNC)
1571 {
1572 Item *left_item= ((Item_func*) item)->arguments()[0];
1573 Item *right_item= ((Item_func*) item)->arguments()[1];
1574
1575 if (item->created_by_in2exists() && !left_item->const_item())
1576 return false; // See note above
1577
1578 if (left_item->type() == Item::ROW_ITEM &&
1579 right_item->type() == Item::ROW_ITEM)
1580 {
1581 thd->lex->current_select->cond_count--;
1582 return check_row_equality(thd,
1583 (Item_row *) left_item,
1584 (Item_row *) right_item,
1585 cond_equal, eq_list);
1586 }
1587 else
1588 return check_simple_equality(left_item, right_item, item, cond_equal);
1589 }
1590
1591 return FALSE;
1592 }
1593
1594
1595 /**
1596 Replace all equality predicates in a condition by multiple equality items.
1597
1598 At each 'and' level the function detects items for equality predicates
1599 and replaced them by a set of multiple equality items of class Item_equal,
1600 taking into account inherited equalities from upper levels.
1601 If an equality predicate is used not in a conjunction it's just
1602 replaced by a multiple equality predicate.
1603 For each 'and' level the function set a pointer to the inherited
1604 multiple equalities in the cond_equal field of the associated
1605 object of the type Item_cond_and.
1606 The function also traverses the cond tree and and for each field reference
1607 sets a pointer to the multiple equality item containing the field, if there
1608 is any. If this multiple equality equates fields to a constant the
1609 function replaces the field reference by the constant in the cases
1610 when the field is not of a string type or when the field reference is
1611 just an argument of a comparison predicate.
1612 The function also determines the maximum number of members in
1613 equality lists of each Item_cond_and object assigning it to
1614 thd->lex->current_select->max_equal_elems.
1615
1616 @note
1617 Multiple equality predicate =(f1,..fn) is equivalent to the conjuction of
1618 f1=f2, .., fn-1=fn. It substitutes any inference from these
1619 equality predicates that is equivalent to the conjunction.
1620 Thus, =(a1,a2,a3) can substitute for ((a1=a3) AND (a2=a3) AND (a2=a1)) as
1621 it is equivalent to ((a1=a2) AND (a2=a3)).
1622 The function always makes a substitution of all equality predicates occured
1623 in a conjuction for a minimal set of multiple equality predicates.
1624 This set can be considered as a canonical representation of the
1625 sub-conjunction of the equality predicates.
1626 E.g. (t1.a=t2.b AND t2.b>5 AND t1.a=t3.c) is replaced by
1627 (=(t1.a,t2.b,t3.c) AND t2.b>5), not by
1628 (=(t1.a,t2.b) AND =(t1.a,t3.c) AND t2.b>5);
1629 while (t1.a=t2.b AND t2.b>5 AND t3.c=t4.d) is replaced by
1630 (=(t1.a,t2.b) AND =(t3.c=t4.d) AND t2.b>5),
1631 but if additionally =(t4.d,t2.b) is inherited, it
1632 will be replaced by (=(t1.a,t2.b,t3.c,t4.d) AND t2.b>5)
1633
1634 The function performs the substitution in a recursive descent by
1635 the condtion tree, passing to the next AND level a chain of multiple
1636 equality predicates which have been built at the upper levels.
1637 The Item_equal items built at the level are attached to other
1638 non-equality conjucts as a sublist. The pointer to the inherited
1639 multiple equalities is saved in the and condition object (Item_cond_and).
1640 This chain allows us for any field reference occurence easyly to find a
1641 multiple equality that must be held for this occurence.
1642 For each AND level we do the following:
1643 - scan it for all equality predicate (=) items
1644 - join them into disjoint Item_equal() groups
1645 - process the included OR conditions recursively to do the same for
1646 lower AND levels.
1647
1648 We need to do things in this order as lower AND levels need to know about
1649 all possible Item_equal objects in upper levels.
1650
1651 @param thd thread handle
1652 @param cond condition(expression) where to make replacement
1653 @param inherited path to all inherited multiple equality items
1654 @param do_inherit whether or not to inherit equalities from other
1655 parts of the condition
1656
1657 @return
1658 pointer to the transformed condition
1659 */
1660
build_equal_items_for_cond(THD * thd,Item * cond,COND_EQUAL * inherited,bool do_inherit)1661 static Item *build_equal_items_for_cond(THD *thd, Item *cond,
1662 COND_EQUAL *inherited,
1663 bool do_inherit)
1664 {
1665 Item_equal *item_equal;
1666 COND_EQUAL cond_equal;
1667 cond_equal.upper_levels= inherited;
1668
1669 if (check_stack_overrun(thd, STACK_MIN_SIZE, NULL))
1670 return cond;
1671
1672 if (cond->type() == Item::COND_ITEM)
1673 {
1674 List<Item> eq_list;
1675 bool and_level= ((Item_cond*) cond)->functype() ==
1676 Item_func::COND_AND_FUNC;
1677 List<Item> *args= ((Item_cond*) cond)->argument_list();
1678
1679 List_iterator<Item> li(*args);
1680 Item *item;
1681
1682 if (and_level)
1683 {
1684 /*
1685 Retrieve all conjuncts of this level detecting the equality
1686 that are subject to substitution by multiple equality items and
1687 removing each such predicate from the conjunction after having
1688 found/created a multiple equality whose inference the predicate is.
1689 */
1690 while ((item= li++))
1691 {
1692 /*
1693 PS/SP note: we can safely remove a node from AND-OR
1694 structure here because it's restored before each
1695 re-execution of any prepared statement/stored procedure.
1696 */
1697 if (check_equality(thd, item, &cond_equal, &eq_list))
1698 li.remove();
1699 }
1700
1701 /*
1702 Check if we eliminated all the predicates of the level, e.g.
1703 (a=a AND b=b AND a=a).
1704 */
1705 if (!args->elements &&
1706 !cond_equal.current_level.elements &&
1707 !eq_list.elements)
1708 return new Item_int((longlong) 1, 1);
1709
1710 List_iterator_fast<Item_equal> it(cond_equal.current_level);
1711 while ((item_equal= it++))
1712 {
1713 item_equal->fix_length_and_dec();
1714 item_equal->update_used_tables();
1715 set_if_bigger(thd->lex->current_select->max_equal_elems,
1716 item_equal->members());
1717 }
1718
1719 ((Item_cond_and*)cond)->cond_equal= cond_equal;
1720 inherited= &(((Item_cond_and*)cond)->cond_equal);
1721 }
1722 /*
1723 Make replacement of equality predicates for lower levels
1724 of the condition expression.
1725 */
1726 li.rewind();
1727 while ((item= li++))
1728 {
1729 Item *new_item=
1730 build_equal_items_for_cond(thd, item, inherited, do_inherit);
1731 if (new_item != item)
1732 {
1733 /* This replacement happens only for standalone equalities */
1734 /*
1735 This is ok with PS/SP as the replacement is done for
1736 arguments of an AND/OR item, which are restored for each
1737 execution of PS/SP.
1738 */
1739 li.replace(new_item);
1740 }
1741 }
1742 if (and_level)
1743 {
1744 args->concat(&eq_list);
1745 args->concat((List<Item> *)&cond_equal.current_level);
1746 }
1747 }
1748 else if (cond->type() == Item::FUNC_ITEM)
1749 {
1750 List<Item> eq_list;
1751 /*
1752 If an equality predicate forms the whole and level,
1753 we call it standalone equality and it's processed here.
1754 E.g. in the following where condition
1755 WHERE a=5 AND (b=5 or a=c)
1756 (b=5) and (a=c) are standalone equalities.
1757 In general we can't leave alone standalone eqalities:
1758 for WHERE a=b AND c=d AND (b=c OR d=5)
1759 b=c is replaced by =(a,b,c,d).
1760 */
1761 if (check_equality(thd, cond, &cond_equal, &eq_list))
1762 {
1763 int n= cond_equal.current_level.elements + eq_list.elements;
1764 if (n == 0)
1765 return new Item_int((longlong) 1,1);
1766 else if (n == 1)
1767 {
1768 if ((item_equal= cond_equal.current_level.pop()))
1769 {
1770 item_equal->fix_length_and_dec();
1771 item_equal->update_used_tables();
1772 set_if_bigger(thd->lex->current_select->max_equal_elems,
1773 item_equal->members());
1774 return item_equal;
1775 }
1776
1777 return eq_list.pop();
1778 }
1779 else
1780 {
1781 /*
1782 Here a new AND level must be created. It can happen only
1783 when a row equality is processed as a standalone predicate.
1784 */
1785 Item_cond_and *and_cond= new Item_cond_and(eq_list);
1786 and_cond->quick_fix_field();
1787 List<Item> *args= and_cond->argument_list();
1788 List_iterator_fast<Item_equal> it(cond_equal.current_level);
1789 while ((item_equal= it++))
1790 {
1791 item_equal->fix_length_and_dec();
1792 item_equal->update_used_tables();
1793 set_if_bigger(thd->lex->current_select->max_equal_elems,
1794 item_equal->members());
1795 }
1796 and_cond->cond_equal= cond_equal;
1797 args->concat((List<Item> *)&cond_equal.current_level);
1798
1799 return and_cond;
1800 }
1801 }
1802
1803 if (do_inherit)
1804 {
1805 /*
1806 For each field reference in cond, not from equal item predicates,
1807 set a pointer to the multiple equality it belongs to (if there is any)
1808 as soon the field is not of a string type or the field reference is
1809 an argument of a comparison predicate.
1810 */
1811 uchar *is_subst_valid= (uchar *) 1;
1812 cond= cond->compile(&Item::subst_argument_checker,
1813 &is_subst_valid,
1814 &Item::equal_fields_propagator,
1815 (uchar *) inherited);
1816 }
1817 cond->update_used_tables();
1818 }
1819 return cond;
1820 }
1821
1822
1823 /**
1824 Build multiple equalities for a condition and all on expressions that
1825 inherit these multiple equalities.
1826
1827 The function first applies the build_equal_items_for_cond function
1828 to build all multiple equalities for condition cond utilizing equalities
1829 referred through the parameter inherited. The extended set of
1830 equalities is returned in the structure referred by the cond_equal_ref
1831 parameter. After this the function calls itself recursively for
1832 all on expressions whose direct references can be found in join_list
1833 and who inherit directly the multiple equalities just having built.
1834
1835 @note
1836 The on expression used in an outer join operation inherits all equalities
1837 from the on expression of the embedding join, if there is any, or
1838 otherwise - from the where condition.
1839 This fact is not obvious, but presumably can be proved.
1840 Consider the following query:
1841 @code
1842 SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t1.a=t3.a AND t2.a=t4.a
1843 WHERE t1.a=t2.a;
1844 @endcode
1845 If the on expression in the query inherits =(t1.a,t2.a), then we
1846 can build the multiple equality =(t1.a,t2.a,t3.a,t4.a) that infers
1847 the equality t3.a=t4.a. Although the on expression
1848 t1.a=t3.a AND t2.a=t4.a AND t3.a=t4.a is not equivalent to the one
1849 in the query the latter can be replaced by the former: the new query
1850 will return the same result set as the original one.
1851
1852 Interesting that multiple equality =(t1.a,t2.a,t3.a,t4.a) allows us
1853 to use t1.a=t3.a AND t3.a=t4.a under the on condition:
1854 @code
1855 SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t1.a=t3.a AND t3.a=t4.a
1856 WHERE t1.a=t2.a
1857 @endcode
1858 This query equivalent to:
1859 @code
1860 SELECT * FROM (t1 LEFT JOIN (t3,t4) ON t1.a=t3.a AND t3.a=t4.a),t2
1861 WHERE t1.a=t2.a
1862 @endcode
1863 Similarly the original query can be rewritten to the query:
1864 @code
1865 SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t2.a=t4.a AND t3.a=t4.a
1866 WHERE t1.a=t2.a
1867 @endcode
1868 that is equivalent to:
1869 @code
1870 SELECT * FROM (t2 LEFT JOIN (t3,t4)ON t2.a=t4.a AND t3.a=t4.a), t1
1871 WHERE t1.a=t2.a
1872 @endcode
1873 Thus, applying equalities from the where condition we basically
1874 can get more freedom in performing join operations.
1875 Althogh we don't use this property now, it probably makes sense to use
1876 it in the future.
1877 @param thd Thread handler
1878 @param cond condition to build the multiple equalities for
1879 @param inherited path to all inherited multiple equality items
1880 @param do_inherit whether or not to inherit equalities from other
1881 parts of the condition
1882 @param join_list list of join tables to which the condition
1883 refers to
1884 @param[out] cond_equal_ref pointer to the structure to place built
1885 equalities in
1886
1887 @return
1888 pointer to the transformed condition containing multiple equalities
1889 */
1890
build_equal_items(THD * thd,Item * cond,COND_EQUAL * inherited,bool do_inherit,List<TABLE_LIST> * join_list,COND_EQUAL ** cond_equal_ref)1891 Item *build_equal_items(THD *thd, Item *cond, COND_EQUAL *inherited,
1892 bool do_inherit, List<TABLE_LIST> *join_list,
1893 COND_EQUAL **cond_equal_ref)
1894 {
1895 COND_EQUAL *cond_equal= 0;
1896
1897 if (cond)
1898 {
1899 cond= build_equal_items_for_cond(thd, cond, inherited, do_inherit);
1900 cond->update_used_tables();
1901 if (cond->type() == Item::COND_ITEM &&
1902 ((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
1903 cond_equal= &((Item_cond_and*) cond)->cond_equal;
1904 else if (cond->type() == Item::FUNC_ITEM &&
1905 ((Item_cond*) cond)->functype() == Item_func::MULT_EQUAL_FUNC)
1906 {
1907 cond_equal= new COND_EQUAL;
1908 cond_equal->current_level.push_back((Item_equal *) cond);
1909 }
1910 }
1911 if (cond_equal)
1912 {
1913 cond_equal->upper_levels= inherited;
1914 inherited= cond_equal;
1915 }
1916 *cond_equal_ref= cond_equal;
1917
1918 if (join_list)
1919 {
1920 TABLE_LIST *table;
1921 List_iterator<TABLE_LIST> li(*join_list);
1922
1923 while ((table= li++))
1924 {
1925 if (table->join_cond())
1926 {
1927 List<TABLE_LIST> *nested_join_list= table->nested_join ?
1928 &table->nested_join->join_list : NULL;
1929 /*
1930 We can modify table->join_cond() because its old value will
1931 be restored before re-execution of PS/SP.
1932 */
1933 table->set_join_cond(build_equal_items(thd, table->join_cond(),
1934 inherited, do_inherit,
1935 nested_join_list,
1936 &table->cond_equal));
1937 }
1938 }
1939 }
1940
1941 return cond;
1942 }
1943
1944
1945 /**
1946 Compare field items by table order in the execution plan.
1947
1948 field1 considered as better than field2 if the table containing
1949 field1 is accessed earlier than the table containing field2.
1950 The function finds out what of two fields is better according
1951 this criteria.
1952
1953 @param field1 first field item to compare
1954 @param field2 second field item to compare
1955 @param table_join_idx index to tables determining table order
1956
1957 @retval
1958 -1 if field1 is better than field2
1959 @retval
1960 1 if field2 is better than field1
1961 @retval
1962 0 otherwise
1963 */
1964
compare_fields_by_table_order(Item_field * field1,Item_field * field2,void * table_join_idx)1965 static int compare_fields_by_table_order(Item_field *field1,
1966 Item_field *field2,
1967 void *table_join_idx)
1968 {
1969 int cmp= 0;
1970 bool outer_ref= 0;
1971 if (field1->used_tables() & OUTER_REF_TABLE_BIT)
1972 {
1973 outer_ref= 1;
1974 cmp= -1;
1975 }
1976 if (field2->used_tables() & OUTER_REF_TABLE_BIT)
1977 {
1978 outer_ref= 1;
1979 cmp++;
1980 }
1981 if (outer_ref)
1982 return cmp;
1983 JOIN_TAB **idx= (JOIN_TAB **) table_join_idx;
1984
1985 /*
1986 idx is NULL if this function was not called from JOIN::optimize()
1987 but from e.g. mysql_delete() or mysql_update(). In these cases
1988 there is only one table and both fields belong to it. Example
1989 condition where this is the case: t1.fld1=t1.fld2
1990 */
1991 if (!idx)
1992 return 0;
1993
1994 cmp= idx[field1->field->table->tablenr]-idx[field2->field->table->tablenr];
1995 return cmp < 0 ? -1 : (cmp ? 1 : 0);
1996 }
1997
1998
1999 /**
2000 Generate minimal set of simple equalities equivalent to a multiple equality.
2001
2002 The function retrieves the fields of the multiple equality item
2003 item_equal and for each field f:
2004 - if item_equal contains const it generates the equality f=const_item;
2005 - otherwise, if f is not the first field, generates the equality
2006 f=item_equal->get_first().
2007 All generated equality are added to the cond conjunction.
2008
2009 @param cond condition to add the generated equality to
2010 @param upper_levels structure to access multiple equality of upper levels
2011 @param item_equal multiple equality to generate simple equality from
2012
2013 @note
2014 Before generating an equality function checks that it has not
2015 been generated for multiple equalities of the upper levels.
2016 E.g. for the following where condition
2017 WHERE a=5 AND ((a=b AND b=c) OR c>4)
2018 the upper level AND condition will contain =(5,a),
2019 while the lower level AND condition will contain =(5,a,b,c).
2020 When splitting =(5,a,b,c) into a separate equality predicates
2021 we should omit 5=a, as we have it already in the upper level.
2022 The following where condition gives us a more complicated case:
2023 WHERE t1.a=t2.b AND t3.c=t4.d AND (t2.b=t3.c OR t4.e>5 ...) AND ...
2024 Given the tables are accessed in the order t1->t2->t3->t4 for
2025 the selected query execution plan the lower level multiple
2026 equality =(t1.a,t2.b,t3.c,t4.d) formally should be converted to
2027 t1.a=t2.b AND t1.a=t3.c AND t1.a=t4.d. But t1.a=t2.a will be
2028 generated for the upper level. Also t3.c=t4.d will be generated there.
2029 So only t1.a=t3.c should be left in the lower level.
2030 If cond is equal to 0, then not more then one equality is generated
2031 and a pointer to it is returned as the result of the function.
2032
2033 @return
2034 - The condition with generated simple equalities or
2035 a pointer to the simple generated equality, if success.
2036 - 0, otherwise.
2037 */
2038
eliminate_item_equal(Item * cond,COND_EQUAL * upper_levels,Item_equal * item_equal)2039 static Item *eliminate_item_equal(Item *cond, COND_EQUAL *upper_levels,
2040 Item_equal *item_equal)
2041 {
2042 List<Item> eq_list;
2043 Item_func_eq *eq_item= NULL;
2044 if (((Item *) item_equal)->const_item() && !item_equal->val_int())
2045 return new Item_int((longlong) 0,1);
2046 Item *const item_const= item_equal->get_const();
2047 Item_equal_iterator it(*item_equal);
2048 if (!item_const)
2049 {
2050 /*
2051 If there is a const item, match all field items with the const item,
2052 otherwise match the second and subsequent field items with the first one:
2053 */
2054 it++;
2055 }
2056 Item_field *item_field; // Field to generate equality for.
2057 while ((item_field= it++))
2058 {
2059 /*
2060 Generate an equality of the form:
2061 item_field = some previous field in item_equal's list.
2062
2063 First see if we really need to generate it:
2064 */
2065 Item_equal *const upper= item_field->find_item_equal(upper_levels);
2066 if (upper) // item_field is in this upper equality
2067 {
2068 if (item_const && upper->get_const())
2069 continue; // Const at both levels, no need to generate at current level
2070 /*
2071 If the upper-level multiple equality contains this item, there is no
2072 need to generate the equality, unless item_field belongs to a
2073 semi-join nest that is used for Materialization, and refers to tables
2074 that are outside of the materialized semi-join nest,
2075 As noted in Item_equal::get_subst_item(), subquery materialization
2076 does not have this problem.
2077 */
2078 JOIN_TAB *const tab= item_field->field->table->reginfo.join_tab;
2079
2080 if (!(tab && sj_is_materialize_strategy(tab->get_sj_strategy())))
2081 {
2082 Item_field *item_match;
2083 Item_equal_iterator li(*item_equal);
2084 while ((item_match= li++) != item_field)
2085 {
2086 if (item_match->find_item_equal(upper_levels) == upper)
2087 break; // (item_match, item_field) is also in upper level equality
2088 }
2089 if (item_match != item_field)
2090 continue;
2091 }
2092 } // ... if (upper).
2093
2094 /*
2095 item_field should be compared with the head of the multiple equality
2096 list.
2097 item_field may refer to a table that is within a semijoin materialization
2098 nest. In that case, the order of the join_tab entries may look like:
2099
2100 ot1 ot2 <subquery> ot5 SJM(it3 it4)
2101
2102 If we have a multiple equality
2103
2104 (ot1.c1, ot2.c2, <subquery>.c it3.c3, it4.c4, ot5.c5),
2105
2106 we should generate the following equalities:
2107 1. ot1.c1 = ot2.c2
2108 2. ot1.c1 = <subquery>.c
2109 3. it3.c3 = it4.c4
2110 4. ot1.c1 = ot5.c5
2111
2112 Equalities 1) and 4) are regular equalities between two outer tables.
2113 Equality 2) is an equality that matches the outer query with a
2114 materialized temporary table. It is either performed as a lookup
2115 into the materialized table (SJM-lookup), or as a condition on the
2116 outer table (SJM-scan).
2117 Equality 3) is evaluated during semijoin materialization.
2118
2119 If there is a const item, match against this one.
2120 Otherwise, match against the first field item in the multiple equality,
2121 unless the item is within a materialized semijoin nest, in case it will
2122 be matched against the first item within the SJM nest.
2123 @see JOIN::set_access_methods()
2124 @see JOIN::set_prefix_tables()
2125 @see Item_equal::get_subst_item()
2126 */
2127
2128 Item *const head=
2129 item_const ? item_const : item_equal->get_subst_item(item_field);
2130 if (head == item_field)
2131 continue;
2132
2133 // we have a pair, can generate 'item_field=head'
2134 if (eq_item)
2135 eq_list.push_back(eq_item);
2136
2137 eq_item= new Item_func_eq(item_field, head);
2138 if (!eq_item || eq_item->set_cmp_func())
2139 return NULL;
2140 eq_item->quick_fix_field();
2141 } // ... while ((item_field= it++))
2142
2143 if (!cond && !eq_list.head())
2144 {
2145 if (!eq_item)
2146 return new Item_int((longlong) 1,1);
2147 return eq_item;
2148 }
2149
2150 if (eq_item)
2151 eq_list.push_back(eq_item);
2152 if (!cond)
2153 cond= new Item_cond_and(eq_list);
2154 else
2155 {
2156 DBUG_ASSERT(cond->type() == Item::COND_ITEM);
2157 if (eq_list.elements)
2158 ((Item_cond *) cond)->add_at_head(&eq_list);
2159 }
2160
2161 cond->quick_fix_field();
2162 cond->update_used_tables();
2163
2164 return cond;
2165 }
2166
2167
2168 /**
2169 Substitute every field reference in a condition by the best equal field
2170 and eliminate all multiple equality predicates.
2171
2172 The function retrieves the cond condition and for each encountered
2173 multiple equality predicate it sorts the field references in it
2174 according to the order of tables specified by the table_join_idx
2175 parameter. Then it eliminates the multiple equality predicate it
2176 replacing it by the conjunction of simple equality predicates
2177 equating every field from the multiple equality to the first
2178 field in it, or to the constant, if there is any.
2179 After this the function retrieves all other conjuncted
2180 predicates substitute every field reference by the field reference
2181 to the first equal field or equal constant if there are any.
2182
2183 @param cond condition to process
2184 @param cond_equal multiple equalities to take into consideration
2185 @param table_join_idx index to tables determining field preference
2186
2187 @note
2188 At the first glance full sort of fields in multiple equality
2189 seems to be an overkill. Yet it's not the case due to possible
2190 new fields in multiple equality item of lower levels. We want
2191 the order in them to comply with the order of upper levels.
2192
2193 @return
2194 The transformed condition, or NULL in case of error
2195 */
2196
substitute_for_best_equal_field(Item * cond,COND_EQUAL * cond_equal,void * table_join_idx)2197 Item* substitute_for_best_equal_field(Item *cond,
2198 COND_EQUAL *cond_equal,
2199 void *table_join_idx)
2200 {
2201 Item_equal *item_equal;
2202
2203 if (cond->type() == Item::COND_ITEM)
2204 {
2205 List<Item> *cond_list= ((Item_cond*) cond)->argument_list();
2206
2207 bool and_level= ((Item_cond*) cond)->functype() ==
2208 Item_func::COND_AND_FUNC;
2209 if (and_level)
2210 {
2211 cond_equal= &((Item_cond_and *) cond)->cond_equal;
2212 cond_list->disjoin((List<Item> *) &cond_equal->current_level);
2213
2214 List_iterator_fast<Item_equal> it(cond_equal->current_level);
2215 while ((item_equal= it++))
2216 {
2217 item_equal->sort(&compare_fields_by_table_order, table_join_idx);
2218 }
2219 }
2220
2221 List_iterator<Item> li(*cond_list);
2222 Item *item;
2223 while ((item= li++))
2224 {
2225 Item *new_item =substitute_for_best_equal_field(item, cond_equal,
2226 table_join_idx);
2227 /*
2228 This works OK with PS/SP re-execution as changes are made to
2229 the arguments of AND/OR items only
2230 */
2231 if (new_item != item)
2232 li.replace(new_item);
2233 }
2234
2235 if (and_level)
2236 {
2237 List_iterator_fast<Item_equal> it(cond_equal->current_level);
2238 while ((item_equal= it++))
2239 {
2240 cond= eliminate_item_equal(cond, cond_equal->upper_levels, item_equal);
2241 if (cond == NULL)
2242 return NULL;
2243 // This occurs when eliminate_item_equal() founds that cond is
2244 // always false and substitutes it with Item_int 0.
2245 // Due to this, value of item_equal will be 0, so just return it.
2246 if (cond->type() != Item::COND_ITEM)
2247 break;
2248 }
2249 }
2250 if (cond->type() == Item::COND_ITEM &&
2251 !((Item_cond*)cond)->argument_list()->elements)
2252 cond= new Item_int((int32)cond->val_bool());
2253
2254 }
2255 else if (cond->type() == Item::FUNC_ITEM &&
2256 ((Item_cond*) cond)->functype() == Item_func::MULT_EQUAL_FUNC)
2257 {
2258 item_equal= (Item_equal *) cond;
2259 item_equal->sort(&compare_fields_by_table_order, table_join_idx);
2260 if (cond_equal && cond_equal->current_level.head() == item_equal)
2261 cond_equal= cond_equal->upper_levels;
2262 return eliminate_item_equal(0, cond_equal, item_equal);
2263 }
2264 else
2265 cond->transform(&Item::replace_equal_field, 0);
2266 return cond;
2267 }
2268
2269
2270 /*
2271 change field = field to field = const for each found field = const in the
2272 and_level
2273 */
2274
2275 static void
change_cond_ref_to_const(THD * thd,I_List<COND_CMP> * save_list,Item * and_father,Item * cond,Item * field,Item * value)2276 change_cond_ref_to_const(THD *thd, I_List<COND_CMP> *save_list,
2277 Item *and_father, Item *cond,
2278 Item *field, Item *value)
2279 {
2280 if (cond->type() == Item::COND_ITEM)
2281 {
2282 bool and_level= ((Item_cond*) cond)->functype() ==
2283 Item_func::COND_AND_FUNC;
2284 List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
2285 Item *item;
2286 while ((item=li++))
2287 change_cond_ref_to_const(thd, save_list,and_level ? cond : item, item,
2288 field, value);
2289 return;
2290 }
2291 if (cond->eq_cmp_result() == Item::COND_OK)
2292 return; // Not a boolean function
2293
2294 Item_bool_func2 *func= (Item_bool_func2*) cond;
2295 Item **args= func->arguments();
2296 Item *left_item= args[0];
2297 Item *right_item= args[1];
2298 Item_func::Functype functype= func->functype();
2299
2300 if (right_item->eq(field,0) && left_item != value &&
2301 right_item->cmp_context == field->cmp_context &&
2302 (left_item->result_type() != STRING_RESULT ||
2303 value->result_type() != STRING_RESULT ||
2304 left_item->collation.collation == value->collation.collation))
2305 {
2306 Item *tmp=value->clone_item();
2307 if (tmp)
2308 {
2309 tmp->collation.set(right_item->collation);
2310 thd->change_item_tree(args + 1, tmp);
2311 func->update_used_tables();
2312 if ((functype == Item_func::EQ_FUNC || functype == Item_func::EQUAL_FUNC)
2313 && and_father != cond && !left_item->const_item())
2314 {
2315 cond->marker=1;
2316 COND_CMP *tmp2;
2317 if ((tmp2=new COND_CMP(and_father,func)))
2318 save_list->push_back(tmp2);
2319 }
2320 func->set_cmp_func();
2321 }
2322 }
2323 else if (left_item->eq(field,0) && right_item != value &&
2324 left_item->cmp_context == field->cmp_context &&
2325 (right_item->result_type() != STRING_RESULT ||
2326 value->result_type() != STRING_RESULT ||
2327 right_item->collation.collation == value->collation.collation))
2328 {
2329 Item *tmp= value->clone_item();
2330 if (tmp)
2331 {
2332 tmp->collation.set(left_item->collation);
2333 thd->change_item_tree(args, tmp);
2334 value= tmp;
2335 func->update_used_tables();
2336 if ((functype == Item_func::EQ_FUNC || functype == Item_func::EQUAL_FUNC)
2337 && and_father != cond && !right_item->const_item())
2338 {
2339 args[0]= args[1]; // For easy check
2340 thd->change_item_tree(args + 1, value);
2341 cond->marker=1;
2342 COND_CMP *tmp2;
2343 if ((tmp2=new COND_CMP(and_father,func)))
2344 save_list->push_back(tmp2);
2345 }
2346 func->set_cmp_func();
2347 }
2348 }
2349 }
2350
2351 static void
propagate_cond_constants(THD * thd,I_List<COND_CMP> * save_list,Item * and_father,Item * cond)2352 propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list,
2353 Item *and_father, Item *cond)
2354 {
2355 if (cond->type() == Item::COND_ITEM)
2356 {
2357 bool and_level= ((Item_cond*) cond)->functype() ==
2358 Item_func::COND_AND_FUNC;
2359 List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
2360 Item *item;
2361 I_List<COND_CMP> save;
2362 while ((item=li++))
2363 {
2364 propagate_cond_constants(thd, &save,and_level ? cond : item, item);
2365 }
2366 if (and_level)
2367 { // Handle other found items
2368 I_List_iterator<COND_CMP> cond_itr(save);
2369 COND_CMP *cond_cmp;
2370 while ((cond_cmp=cond_itr++))
2371 {
2372 Item **args= cond_cmp->cmp_func->arguments();
2373 if (!args[0]->const_item())
2374 change_cond_ref_to_const(thd, &save,cond_cmp->and_level,
2375 cond_cmp->and_level, args[0], args[1]);
2376 }
2377 }
2378 }
2379 else if (and_father != cond && !cond->marker) // In a AND group
2380 {
2381 if (cond->type() == Item::FUNC_ITEM &&
2382 (((Item_func*) cond)->functype() == Item_func::EQ_FUNC ||
2383 ((Item_func*) cond)->functype() == Item_func::EQUAL_FUNC))
2384 {
2385 Item_func_eq *func=(Item_func_eq*) cond;
2386 Item **args= func->arguments();
2387 bool left_const= args[0]->const_item();
2388 bool right_const= args[1]->const_item();
2389 if (!(left_const && right_const) &&
2390 args[0]->result_type() == args[1]->result_type())
2391 {
2392 if (right_const)
2393 {
2394 resolve_const_item(thd, &args[1], args[0]);
2395 func->update_used_tables();
2396 change_cond_ref_to_const(thd, save_list, and_father, and_father,
2397 args[0], args[1]);
2398 }
2399 else if (left_const)
2400 {
2401 resolve_const_item(thd, &args[0], args[1]);
2402 func->update_used_tables();
2403 change_cond_ref_to_const(thd, save_list, and_father, and_father,
2404 args[1], args[0]);
2405 }
2406 }
2407 }
2408 }
2409 }
2410
2411
2412 /**
2413 Simplify joins replacing outer joins by inner joins whenever it's
2414 possible.
2415
2416 The function, during a retrieval of join_list, eliminates those
2417 outer joins that can be converted into inner join, possibly nested.
2418 It also moves the join conditions for the converted outer joins
2419 and from inner joins to conds.
2420 The function also calculates some attributes for nested joins:
2421 - used_tables
2422 - not_null_tables
2423 - dep_tables.
2424 - on_expr_dep_tables
2425 The first two attributes are used to test whether an outer join can
2426 be substituted for an inner join. The third attribute represents the
2427 relation 'to be dependent on' for tables. If table t2 is dependent
2428 on table t1, then in any evaluated execution plan table access to
2429 table t2 must precede access to table t2. This relation is used also
2430 to check whether the query contains invalid cross-references.
2431 The forth attribute is an auxiliary one and is used to calculate
2432 dep_tables.
2433 As the attribute dep_tables qualifies possibles orders of tables in the
2434 execution plan, the dependencies required by the straight join
2435 modifiers are reflected in this attribute as well.
2436 The function also removes all braces that can be removed from the join
2437 expression without changing its meaning.
2438
2439 @note
2440 An outer join can be replaced by an inner join if the where condition
2441 or the join condition for an embedding nested join contains a conjunctive
2442 predicate rejecting null values for some attribute of the inner tables.
2443
2444 E.g. in the query:
2445 @code
2446 SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a WHERE t2.b < 5
2447 @endcode
2448 the predicate t2.b < 5 rejects nulls.
2449 The query is converted first to:
2450 @code
2451 SELECT * FROM t1 INNER JOIN t2 ON t2.a=t1.a WHERE t2.b < 5
2452 @endcode
2453 then to the equivalent form:
2454 @code
2455 SELECT * FROM t1, t2 ON t2.a=t1.a WHERE t2.b < 5 AND t2.a=t1.a
2456 @endcode
2457
2458
2459 Similarly the following query:
2460 @code
2461 SELECT * from t1 LEFT JOIN (t2, t3) ON t2.a=t1.a t3.b=t1.b
2462 WHERE t2.c < 5
2463 @endcode
2464 is converted to:
2465 @code
2466 SELECT * FROM t1, (t2, t3) WHERE t2.c < 5 AND t2.a=t1.a t3.b=t1.b
2467
2468 @endcode
2469
2470 One conversion might trigger another:
2471 @code
2472 SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a
2473 LEFT JOIN t3 ON t3.b=t2.b
2474 WHERE t3 IS NOT NULL =>
2475 SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a, t3
2476 WHERE t3 IS NOT NULL AND t3.b=t2.b =>
2477 SELECT * FROM t1, t2, t3
2478 WHERE t3 IS NOT NULL AND t3.b=t2.b AND t2.a=t1.a
2479 @endcode
2480
2481 The function removes all unnecessary braces from the expression
2482 produced by the conversions.
2483 E.g.
2484 @code
2485 SELECT * FROM t1, (t2, t3) WHERE t2.c < 5 AND t2.a=t1.a AND t3.b=t1.b
2486 @endcode
2487 finally is converted to:
2488 @code
2489 SELECT * FROM t1, t2, t3 WHERE t2.c < 5 AND t2.a=t1.a AND t3.b=t1.b
2490
2491 @endcode
2492
2493
2494 It also will remove braces from the following queries:
2495 @code
2496 SELECT * from (t1 LEFT JOIN t2 ON t2.a=t1.a) LEFT JOIN t3 ON t3.b=t2.b
2497 SELECT * from (t1, (t2,t3)) WHERE t1.a=t2.a AND t2.b=t3.b.
2498 @endcode
2499
2500 The benefit of this simplification procedure is that it might return
2501 a query for which the optimizer can evaluate execution plan with more
2502 join orders. With a left join operation the optimizer does not
2503 consider any plan where one of the inner tables is before some of outer
2504 tables.
2505
2506 IMPLEMENTATION
2507 The function is implemented by a recursive procedure. On the recursive
2508 ascent all attributes are calculated, all outer joins that can be
2509 converted are replaced and then all unnecessary braces are removed.
2510 As join list contains join tables in the reverse order sequential
2511 elimination of outer joins does not require extra recursive calls.
2512
2513 SEMI-JOIN NOTES
2514 Remove all semi-joins that have are within another semi-join (i.e. have
2515 an "ancestor" semi-join nest)
2516
2517 EXAMPLES
2518 Here is an example of a join query with invalid cross references:
2519 @code
2520 SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t3.a LEFT JOIN t3 ON t3.b=t1.b
2521 @endcode
2522
2523 @param join reference to the query info
2524 @param join_list list representation of the join to be converted
2525 @param conds condition that join condition for converted outer joins
2526 is added to
2527 @param top true <=> conds is the where condition
2528 @param in_sj TRUE <=> processing semi-join nest's children
2529 @param[out] new_conds New condition
2530 @param changelog Don't specify this parameter, it is reserved for
2531 recursive calls inside this function
2532
2533 @returns true for error, false for success
2534 */
2535
2536 static bool
simplify_joins(JOIN * join,List<TABLE_LIST> * join_list,Item * conds,bool top,bool in_sj,Item ** new_conds,uint * changelog)2537 simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, Item *conds, bool top,
2538 bool in_sj, Item **new_conds, uint *changelog)
2539 {
2540
2541 /*
2542 Each type of change done by this function, or its recursive calls, is
2543 tracked in a bitmap:
2544 */
2545 enum change
2546 {
2547 NONE= 0,
2548 OUTER_JOIN_TO_INNER= 1 << 0,
2549 JOIN_COND_TO_WHERE= 1 << 1,
2550 PAREN_REMOVAL= 1 << 2,
2551 SEMIJOIN= 1 << 3
2552 };
2553 uint changes= 0; // To keep track of changes.
2554 if (changelog == NULL) // This is the top call.
2555 changelog= &changes;
2556
2557 TABLE_LIST *table;
2558 NESTED_JOIN *nested_join;
2559 TABLE_LIST *prev_table= 0;
2560 List_iterator<TABLE_LIST> li(*join_list);
2561 bool straight_join= MY_TEST(join->select_options & SELECT_STRAIGHT_JOIN);
2562 DBUG_ENTER("simplify_joins");
2563
2564 /*
2565 Try to simplify join operations from join_list.
2566 The most outer join operation is checked for conversion first.
2567 */
2568 while ((table= li++))
2569 {
2570 table_map used_tables;
2571 table_map not_null_tables= (table_map) 0;
2572
2573 if ((nested_join= table->nested_join))
2574 {
2575 /*
2576 If the element of join_list is a nested join apply
2577 the procedure to its nested join list first.
2578 */
2579 if (table->join_cond())
2580 {
2581 Item *join_cond= table->join_cond();
2582 /*
2583 If a join condition JC is attached to the table,
2584 check all null rejected predicates in this condition.
2585 If such a predicate over an attribute belonging to
2586 an inner table of an embedded outer join is found,
2587 the outer join is converted to an inner join and
2588 the corresponding join condition is added to JC.
2589 */
2590 if (simplify_joins(join, &nested_join->join_list,
2591 join_cond, false, in_sj || table->sj_on_expr,
2592 &join_cond, changelog))
2593 DBUG_RETURN(true);
2594
2595 if (join_cond != table->join_cond())
2596 {
2597 DBUG_ASSERT(join_cond);
2598
2599 table->set_join_cond(join_cond);
2600 }
2601 }
2602 nested_join->used_tables= (table_map) 0;
2603 nested_join->not_null_tables=(table_map) 0;
2604 if (simplify_joins(join, &nested_join->join_list, conds, top,
2605 in_sj || table->sj_on_expr, &conds, changelog))
2606 DBUG_RETURN(true);
2607 used_tables= nested_join->used_tables;
2608 not_null_tables= nested_join->not_null_tables;
2609 }
2610 else
2611 {
2612 used_tables= table->table->map;
2613 if (conds)
2614 not_null_tables= conds->not_null_tables();
2615 }
2616
2617 if (table->embedding)
2618 {
2619 table->embedding->nested_join->used_tables|= used_tables;
2620 table->embedding->nested_join->not_null_tables|= not_null_tables;
2621 }
2622
2623 if (!table->outer_join || (used_tables & not_null_tables))
2624 {
2625 /*
2626 For some of the inner tables there are conjunctive predicates
2627 that reject nulls => the outer join can be replaced by an inner join.
2628 */
2629 if (table->outer_join)
2630 {
2631 *changelog|= OUTER_JOIN_TO_INNER;
2632 table->outer_join= 0;
2633 }
2634 if (table->join_cond())
2635 {
2636 *changelog|= JOIN_COND_TO_WHERE;
2637 /* Add join condition to the WHERE or upper-level join condition. */
2638 if (conds)
2639 {
2640 Item_cond_and *new_cond=
2641 static_cast<Item_cond_and*>(and_conds(conds, table->join_cond()));
2642 if (!new_cond)
2643 DBUG_RETURN(true);
2644 conds= new_cond;
2645 conds->top_level_item();
2646 /*
2647 conds is always a new item as both the upper-level condition and a
2648 join condition existed
2649 */
2650 DBUG_ASSERT(!conds->fixed);
2651 if (conds->fix_fields(join->thd, &conds))
2652 DBUG_RETURN(true);
2653
2654 /* If join condition has a pending rollback in THD::change_list */
2655 List_iterator<Item> lit(*new_cond->argument_list());
2656 Item *arg;
2657 while ((arg= lit++))
2658 {
2659 /*
2660 The join condition isn't necessarily the second argument anymore,
2661 since fix_fields may have merged it into an existing AND expr.
2662 */
2663 if (arg == table->join_cond())
2664 join->thd->
2665 change_item_tree_place(table->join_cond_ref(), lit.ref());
2666 }
2667 }
2668 else
2669 {
2670 conds= table->join_cond();
2671 /* If join condition has a pending rollback in THD::change_list */
2672 join->thd->change_item_tree_place(table->join_cond_ref(), &conds);
2673 }
2674 table->set_join_cond(NULL);
2675 }
2676 }
2677
2678 if (!top)
2679 continue;
2680
2681 /*
2682 Only inner tables of non-convertible outer joins remain with
2683 the join condition.
2684 */
2685 if (table->join_cond())
2686 {
2687 table->dep_tables|= table->join_cond()->used_tables();
2688 if (table->embedding)
2689 {
2690 table->dep_tables&= ~table->embedding->nested_join->used_tables;
2691
2692 // Embedding table depends on tables used in embedded join conditions.
2693 table->embedding->on_expr_dep_tables|=
2694 table->join_cond()->used_tables();
2695 }
2696 else
2697 table->dep_tables&= ~table->table->map;
2698 }
2699
2700 if (prev_table)
2701 {
2702 /* The order of tables is reverse: prev_table follows table */
2703 if (prev_table->straight || straight_join)
2704 prev_table->dep_tables|= used_tables;
2705 if (prev_table->join_cond())
2706 {
2707 prev_table->dep_tables|= table->on_expr_dep_tables;
2708 table_map prev_used_tables= prev_table->nested_join ?
2709 prev_table->nested_join->used_tables :
2710 prev_table->table->map;
2711 /*
2712 If join condition contains only references to inner tables
2713 we still make the inner tables dependent on the outer tables.
2714 It would be enough to set dependency only on one outer table
2715 for them. Yet this is really a rare case.
2716 Note:
2717 RAND_TABLE_BIT mask should not be counted as it
2718 prevents update of inner table dependences.
2719 For example it might happen if RAND() function
2720 is used in JOIN ON clause.
2721 */
2722 if (!((prev_table->join_cond()->used_tables() & ~RAND_TABLE_BIT) &
2723 ~prev_used_tables))
2724 prev_table->dep_tables|= used_tables;
2725 }
2726 }
2727 prev_table= table;
2728 }
2729
2730 /*
2731 Flatten nested joins that can be flattened.
2732 no join condition and not a semi-join => can be flattened.
2733 */
2734 li.rewind();
2735 while ((table= li++))
2736 {
2737 nested_join= table->nested_join;
2738 if (table->sj_on_expr && !in_sj)
2739 {
2740 /*
2741 If this is a semi-join that is not contained within another semi-join,
2742 leave it intact (otherwise it is flattened)
2743 */
2744 *changelog|= SEMIJOIN;
2745 }
2746 else if (nested_join && !table->join_cond())
2747 {
2748 *changelog|= PAREN_REMOVAL;
2749 TABLE_LIST *tbl;
2750 List_iterator<TABLE_LIST> it(nested_join->join_list);
2751 while ((tbl= it++))
2752 {
2753 tbl->embedding= table->embedding;
2754 tbl->join_list= table->join_list;
2755 tbl->dep_tables|= table->dep_tables;
2756 }
2757 li.replace(nested_join->join_list);
2758 }
2759 }
2760 *new_conds= conds;
2761
2762 if (changes)
2763 {
2764 Opt_trace_context * trace= &join->thd->opt_trace;
2765 if (unlikely(trace->is_started()))
2766 {
2767 Opt_trace_object trace_wrapper(trace);
2768 Opt_trace_object trace_object(trace, "transformations_to_nested_joins");
2769 {
2770 Opt_trace_array trace_changes(trace, "transformations");
2771 if (changes & SEMIJOIN)
2772 trace_changes.add_alnum("semijoin");
2773 if (changes & OUTER_JOIN_TO_INNER)
2774 trace_changes.add_alnum("outer_join_to_inner_join");
2775 if (changes & JOIN_COND_TO_WHERE)
2776 trace_changes.add_alnum("JOIN_condition_to_WHERE");
2777 if (changes & PAREN_REMOVAL)
2778 trace_changes.add_alnum("parenthesis_removal");
2779 }
2780 // the newly transformed query is worth printing
2781 opt_trace_print_expanded_query(join->thd, join->select_lex,
2782 &trace_object);
2783 }
2784 }
2785 DBUG_RETURN(false);
2786 }
2787
2788
2789 /**
2790 Record join nest info in the select block.
2791
2792 After simplification of inner join, outer join and semi-join structures:
2793 - record the remaining semi-join structures in the enclosing query block.
2794 - record transformed join conditions in TABLE_LIST objects.
2795
2796 This function is called recursively for each join nest and/or table
2797 in the query block.
2798
2799 @param select The query block
2800 @param tables List of tables and join nests
2801
2802 @return False if successful, True if failure
2803 */
2804
record_join_nest_info(st_select_lex * select,List<TABLE_LIST> * tables)2805 static bool record_join_nest_info(st_select_lex *select,
2806 List<TABLE_LIST> *tables)
2807
2808 {
2809 TABLE_LIST *table;
2810 List_iterator<TABLE_LIST> li(*tables);
2811 DBUG_ENTER("record_join_nest_info");
2812
2813 while ((table= li++))
2814 {
2815 table->prep_join_cond= table->join_cond() ?
2816 table->join_cond()->copy_andor_structure(select->join->thd, true) : NULL;
2817
2818 if (table->nested_join == NULL)
2819 continue;
2820
2821 if (record_join_nest_info(select, &table->nested_join->join_list))
2822 DBUG_RETURN(true);
2823 /*
2824 sj_inner_tables is set properly later in pull_out_semijoin_tables().
2825 This assignment is required in case pull_out_semijoin_tables()
2826 is not called.
2827 */
2828 if (table->sj_on_expr)
2829 table->sj_inner_tables= table->nested_join->used_tables;
2830 if (table->sj_on_expr && select->sj_nests.push_back(table))
2831 DBUG_RETURN(true);
2832 }
2833 DBUG_RETURN(false);
2834 }
2835
2836
2837 /**
2838 Assign each nested join structure a bit in nested_join_map.
2839
2840 @param join_list List of tables
2841 @param first_unused Number of first unused bit in nested_join_map before the
2842 call
2843
2844 @note
2845 This function is called after simplify_joins(), when there are no
2846 redundant nested joins.
2847 We cannot have more nested joins in a query block than there are tables,
2848 so as long as the number of bits in nested_join_map is not less than the
2849 maximum number of tables in a query block, nested_join_map can never
2850 overflow.
2851
2852 @return
2853 First unused bit in nested_join_map after the call.
2854 */
2855
build_bitmap_for_nested_joins(List<TABLE_LIST> * join_list,uint first_unused)2856 static uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list,
2857 uint first_unused)
2858 {
2859 List_iterator<TABLE_LIST> li(*join_list);
2860 TABLE_LIST *table;
2861 DBUG_ENTER("build_bitmap_for_nested_joins");
2862 while ((table= li++))
2863 {
2864 NESTED_JOIN *nested_join;
2865 if ((nested_join= table->nested_join))
2866 {
2867 // We should have either a join condition or a semi-join condition
2868 DBUG_ASSERT((table->join_cond() == NULL) == (table->sj_on_expr != NULL));
2869
2870 nested_join->nj_map= 0;
2871 nested_join->nj_total= 0;
2872 /*
2873 We only record nested join information for outer join nests.
2874 Tables belonging in semi-join nests are recorded in the
2875 embedding outer join nest, if one exists.
2876 */
2877 if (table->join_cond())
2878 {
2879 DBUG_ASSERT(first_unused < sizeof(nested_join_map)*8);
2880 nested_join->nj_map= (nested_join_map) 1 << first_unused++;
2881 nested_join->nj_total= nested_join->join_list.elements;
2882 }
2883 else if (table->sj_on_expr)
2884 {
2885 NESTED_JOIN *const outer_nest=
2886 table->embedding ? table->embedding->nested_join : NULL;
2887 /*
2888 The semi-join nest has already been counted into the table count
2889 for the outer join nest as one table, so subtract 1 from the
2890 table count.
2891 */
2892 if (outer_nest)
2893 outer_nest->nj_total+= (nested_join->join_list.elements - 1);
2894 }
2895 else
2896 DBUG_ASSERT(false);
2897
2898 first_unused= build_bitmap_for_nested_joins(&nested_join->join_list,
2899 first_unused);
2900 }
2901 }
2902 DBUG_RETURN(first_unused);
2903 }
2904
2905
2906 /** Update the dependency map for the tables. */
2907
update_depend_map(JOIN * join)2908 void update_depend_map(JOIN *join)
2909 {
2910 for (uint tableno = 0; tableno < join->tables; tableno++)
2911 {
2912 JOIN_TAB *const join_tab= join->join_tab + tableno;
2913 TABLE_REF *const ref= &join_tab->ref;
2914 table_map depend_map=0;
2915 Item **item=ref->items;
2916 uint i;
2917 for (i=0 ; i < ref->key_parts ; i++,item++)
2918 depend_map|=(*item)->used_tables();
2919 depend_map&= ~PSEUDO_TABLE_BITS;
2920 ref->depend_map= depend_map;
2921 for (JOIN_TAB **tab=join->map2table;
2922 depend_map ;
2923 tab++,depend_map>>=1 )
2924 {
2925 if (depend_map & 1)
2926 ref->depend_map|=(*tab)->ref.depend_map;
2927 }
2928 }
2929 }
2930
2931
2932 /** Update the dependency map for the sort order. */
2933
update_depend_map(JOIN * join,ORDER * order)2934 static void update_depend_map(JOIN *join, ORDER *order)
2935 {
2936 for (; order ; order=order->next)
2937 {
2938 table_map depend_map;
2939 order->item[0]->update_used_tables();
2940 order->depend_map= depend_map=
2941 order->item[0]->used_tables() & ~PARAM_TABLE_BIT;
2942 order->used= 0;
2943 // Not item_sum(), RAND() and no reference to table outside of sub select
2944 if (!(order->depend_map & (OUTER_REF_TABLE_BIT | RAND_TABLE_BIT))
2945 && !order->item[0]->with_sum_func)
2946 {
2947 for (JOIN_TAB **tab=join->map2table;
2948 depend_map ;
2949 tab++, depend_map>>=1)
2950 {
2951 if (depend_map & 1)
2952 order->depend_map|=(*tab)->ref.depend_map;
2953 }
2954 }
2955 }
2956 }
2957
2958
2959 /**
2960 Update equalities and keyuse references after semi-join materialization
2961 strategy is chosen.
2962
2963 @details
2964 For each multiple equality that contains a field that is selected
2965 from a subquery, and that subquery is executed using a semi-join
2966 materialization strategy, add the corresponding column in the materialized
2967 temporary table to the equality.
2968 For each injected semi-join equality that is not converted to
2969 multiple equality, replace the reference to the expression selected
2970 from the subquery with the corresponding column in the temporary table.
2971
2972 This is needed to properly reflect the equalities that involve injected
2973 semi-join equalities when materialization strategy is chosen.
2974 @see eliminate_item_equal() for how these equalities are used to generate
2975 correct equality predicates.
2976
2977 The MaterializeScan semi-join strategy requires some additional processing:
2978 All primary tables after the materialized temporary table must be inspected
2979 for keyuse objects that point to expressions from the subquery tables.
2980 These references must be replaced with references to corresponding columns
2981 in the materialized temporary table instead. Those primary tables using
2982 ref access will thus be made to depend on the materialized temporary table
2983 instead of the subquery tables.
2984
2985 Only the injected semi-join equalities need this treatment, other predicates
2986 will be handled correctly by the regular item substitution process.
2987
2988 @return False if success, true if error
2989 */
2990
update_equalities_for_sjm()2991 bool JOIN::update_equalities_for_sjm()
2992 {
2993 List_iterator<Semijoin_mat_exec> it(sjm_exec_list);
2994 Semijoin_mat_exec *sjm_exec;
2995 while ((sjm_exec= it++))
2996 {
2997 TABLE_LIST *const sj_nest= sjm_exec->sj_nest;
2998
2999 DBUG_ASSERT(!sj_nest->outer_join_nest());
3000 /*
3001 A materialized semi-join nest cannot actually be an inner part of an
3002 outer join yet, this is just a preparatory step,
3003 ie sj_nest->outer_join_nest() is always NULL here.
3004 @todo: Enable outer joining here later.
3005 */
3006 Item *cond= sj_nest->outer_join_nest() ?
3007 sj_nest->outer_join_nest()->join_cond() :
3008 conds;
3009 if (!cond)
3010 continue;
3011
3012 uchar *dummy= NULL;
3013 cond= cond->compile(&Item::equality_substitution_analyzer, &dummy,
3014 &Item::equality_substitution_transformer,
3015 (uchar *)sj_nest);
3016 if (cond == NULL)
3017 return true;
3018
3019 cond->update_used_tables();
3020
3021 // Loop over all primary tables that follow the materialized table
3022 for (uint j= sjm_exec->mat_table_index + 1; j < primary_tables; j++)
3023 {
3024 JOIN_TAB *const tab= join_tab + j;
3025 for (Key_use *keyuse= tab->position->key;
3026 keyuse && keyuse->table == tab->table &&
3027 keyuse->key == tab->position->key->key;
3028 keyuse++)
3029 {
3030 List_iterator<Item> it(sj_nest->nested_join->sj_inner_exprs);
3031 Item *old;
3032 uint fieldno= 0;
3033 while ((old= it++))
3034 {
3035 if (old->real_item()->eq(keyuse->val->real_item(), false))
3036 {
3037 /*
3038 Replace the expression selected from the subquery with the
3039 corresponding column of the materialized temporary table.
3040 */
3041 keyuse->val= sj_nest->nested_join->sjm.mat_fields[fieldno];
3042 keyuse->used_tables= keyuse->val->used_tables();
3043 break;
3044 }
3045 fieldno++;
3046 }
3047 }
3048 }
3049 }
3050
3051 return false;
3052 }
3053
3054
3055 /**
3056 Assign set of available (prefix) tables to all tables in query block.
3057 Also set added tables, ie the tables added in each JOIN_TAB compared to the
3058 previous JOIN_TAB.
3059 This function must be called for every query block after the table order
3060 has been determined.
3061 */
3062
set_prefix_tables()3063 void JOIN::set_prefix_tables()
3064 {
3065 DBUG_ASSERT(!plan_is_const());
3066 /*
3067 The const tables are available together with the first non-const table in
3068 the join order.
3069 */
3070 table_map const initial_tables_map= const_table_map |
3071 (allow_outer_refs ? OUTER_REF_TABLE_BIT : 0);
3072
3073 table_map current_tables_map= initial_tables_map;
3074 table_map prev_tables_map= (table_map) 0;
3075 table_map saved_tables_map= (table_map) 0;
3076
3077 JOIN_TAB *last_non_sjm_tab= NULL; // Track the last non-sjm table
3078
3079 for (uint i= const_tables; i < tables; i++)
3080 {
3081 JOIN_TAB *const tab= join_tab + i;
3082 if (!tab->table)
3083 continue;
3084 /*
3085 Tables that are within SJ-Materialization nests cannot have their
3086 conditions referring to preceding non-const tables.
3087 - If we're looking at the first SJM table, reset current_tables_map
3088 to refer to only allowed tables
3089 @see Item_equal::get_subst_item()
3090 @see eliminate_item_equal()
3091 */
3092 if (sj_is_materialize_strategy(tab->get_sj_strategy()))
3093 {
3094 const table_map sjm_inner_tables= tab->emb_sj_nest->sj_inner_tables;
3095 if (!(sjm_inner_tables & current_tables_map))
3096 {
3097 saved_tables_map= current_tables_map;
3098 current_tables_map= initial_tables_map;
3099 prev_tables_map= (table_map) 0;
3100 }
3101
3102 current_tables_map|= tab->table->map;
3103 tab->set_prefix_tables(current_tables_map, prev_tables_map);
3104 prev_tables_map= current_tables_map;
3105
3106 if (!(sjm_inner_tables & ~current_tables_map))
3107 {
3108 // At the end of a semi-join materialization nest, restore previous map
3109 current_tables_map= saved_tables_map;
3110 prev_tables_map= last_non_sjm_tab ?
3111 last_non_sjm_tab->prefix_tables() : (table_map) 0;
3112 }
3113 }
3114 else
3115 {
3116 last_non_sjm_tab= tab;
3117 current_tables_map|= tab->table->map;
3118 tab->set_prefix_tables(current_tables_map, prev_tables_map);
3119 prev_tables_map= current_tables_map;
3120 }
3121 }
3122 /*
3123 Random expressions must be added to the last table's condition.
3124 It solves problem with queries like SELECT * FROM t1 WHERE rand() > 0.5
3125 */
3126 if (last_non_sjm_tab != NULL)
3127 last_non_sjm_tab->add_prefix_tables(RAND_TABLE_BIT);
3128 }
3129
3130
3131 /**
3132 Calculate best possible join order and initialize the join structure.
3133
3134 @param join Join object that is populated with statistics data
3135 @param tables_arg List of tables that is referenced by this query
3136 @param conds Where condition of query
3137 @param keyuse_array[out] Populated with key_use information
3138 @param first_optimization True if first optimization of this query
3139
3140 @return true if success, false if error
3141
3142 @details
3143 Here is an overview of the logic of this function:
3144
3145 - Initialize JOIN data structures and setup basic dependencies between tables.
3146
3147 - Update dependencies based on join information.
3148
3149 - Make key descriptions (update_ref_and_keys()).
3150
3151 - Pull out semi-join tables based on table dependencies.
3152
3153 - Extract tables with zero or one rows as const tables.
3154
3155 - Read contents of const tables, substitute columns from these tables with
3156 actual data. Also keep track of empty tables vs. one-row tables.
3157
3158 - After const table extraction based on row count, more tables may
3159 have become functionally dependent. Extract these as const tables.
3160
3161 - Add new sargable predicates based on retrieved const values.
3162
3163 - Calculate number of rows to be retrieved from each table.
3164
3165 - Calculate cost of potential semi-join materializations.
3166
3167 - Calculate best possible join order based on available statistics.
3168
3169 - Fill in remaining information for the generated join order.
3170 */
3171
3172 static bool
make_join_statistics(JOIN * join,TABLE_LIST * tables_arg,Item * conds,Key_use_array * keyuse_array,bool first_optimization)3173 make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, Item *conds,
3174 Key_use_array *keyuse_array, bool first_optimization)
3175 {
3176 int error;
3177 THD *const thd= join->thd;
3178 TABLE_LIST *tables= tables_arg;
3179 uint i,const_count,key;
3180 const uint table_count= join->tables;
3181 table_map found_ref, refs;
3182 JOIN_TAB *stat,*stat_end,*s,**stat_ref;
3183 Key_use *keyuse, *start_keyuse;
3184 table_map outer_join= 0;
3185 SARGABLE_PARAM *sargables= 0;
3186 JOIN_TAB *stat_vector[MAX_TABLES+1];
3187 Opt_trace_context * const trace= &join->thd->opt_trace;
3188 DBUG_ENTER("make_join_statistics");
3189
3190 stat= new (thd->mem_root) JOIN_TAB[table_count];
3191 stat_ref= (JOIN_TAB**) thd->alloc(sizeof(JOIN_TAB*)*MAX_TABLES);
3192 if (!stat || !stat_ref)
3193 DBUG_RETURN(true);
3194
3195 if (!(join->positions=
3196 new (thd->mem_root) POSITION[table_count+1]))
3197 DBUG_RETURN(true);
3198
3199 // Up to one extra slot per semi-join nest is needed (if materialized)
3200 uint sj_nests= join->select_lex->sj_nests.elements;
3201 if (!(join->best_positions=
3202 new (thd->mem_root) POSITION[table_count + sj_nests + 1]))
3203 DBUG_RETURN(true);
3204
3205 join->best_ref= stat_vector;
3206
3207 stat_end= stat+table_count;
3208 join->const_table_map= 0;
3209 join->found_const_table_map= 0;
3210 join->all_table_map= 0;
3211 const_count= 0;
3212
3213 /*
3214 Initialize data structures for tables to be joined.
3215 Initialize dependencies between tables.
3216 */
3217 for (s= stat, i= 0;
3218 tables;
3219 s++, tables= tables->next_leaf, i++)
3220 {
3221 stat_vector[i]=s;
3222 TABLE *const table= tables->table;
3223 s->table= table;
3224 table->pos_in_table_list= tables;
3225 error= tables->fetch_number_of_rows();
3226
3227 DBUG_EXECUTE_IF("bug11747970_raise_error",
3228 {
3229 if (!error)
3230 {
3231 my_error(ER_UNKNOWN_ERROR, MYF(0));
3232 goto error;
3233 }
3234 });
3235
3236 if (error)
3237 {
3238 table->file->print_error(error, MYF(0));
3239 goto error;
3240 }
3241 table->quick_keys.clear_all();
3242 table->possible_quick_keys.clear_all();
3243 table->reginfo.join_tab=s;
3244 table->reginfo.not_exists_optimize=0;
3245 memset(table->const_key_parts, 0, sizeof(key_part_map)*table->s->keys);
3246 join->all_table_map|= table->map;
3247 s->join=join;
3248
3249 s->dependent= tables->dep_tables;
3250 if (tables->schema_table)
3251 table->file->stats.records= 2;
3252 table->quick_condition_rows= table->file->stats.records;
3253
3254 s->on_expr_ref= tables->join_cond_ref();
3255
3256 if (tables->outer_join_nest())
3257 {
3258 /* s belongs to a nested join, maybe to several embedding joins */
3259 s->embedding_map= 0;
3260 for (TABLE_LIST *embedding= tables->embedding;
3261 embedding;
3262 embedding= embedding->embedding)
3263 {
3264 NESTED_JOIN *nested_join= embedding->nested_join;
3265 s->embedding_map|=nested_join->nj_map;
3266 s->dependent|= embedding->dep_tables;
3267 outer_join|= nested_join->used_tables;
3268 }
3269 }
3270 else if (*s->on_expr_ref)
3271 {
3272 /* s is the only inner table of an outer join */
3273 outer_join|= table->map;
3274 s->embedding_map= 0;
3275 for (TABLE_LIST *embedding= tables->embedding;
3276 embedding;
3277 embedding= embedding->embedding)
3278 s->embedding_map|= embedding->nested_join->nj_map;
3279 }
3280 }
3281 stat_vector[i]=0;
3282 join->outer_join=outer_join;
3283
3284 if (join->outer_join)
3285 {
3286 /*
3287 Complete the dependency analysis.
3288 Build transitive closure for relation 'to be dependent on'.
3289 This will speed up the plan search for many cases with outer joins,
3290 as well as allow us to catch illegal cross references.
3291 Warshall's algorithm is used to build the transitive closure.
3292 As we may restart the outer loop upto 'table_count' times, the
3293 complexity of the algorithm is O((number of tables)^3).
3294 However, most of the iterations will be shortcircuited when
3295 there are no pedendencies to propogate.
3296 */
3297 for (i= 0 ; i < table_count ; i++)
3298 {
3299 TABLE *const table= stat[i].table;
3300
3301 if (!table->reginfo.join_tab->dependent)
3302 continue;
3303
3304 uint j;
3305 /* Add my dependencies to other tables depending on me */
3306 for (j= 0, s= stat ; j < table_count ; j++, s++)
3307 {
3308 if (s->dependent & table->map)
3309 {
3310 table_map was_dependent= s->dependent;
3311 s->dependent |= table->reginfo.join_tab->dependent;
3312 /*
3313 If we change dependencies for a table we already have
3314 processed: Redo dependency propagation from this table.
3315 */
3316 if (i > j && s->dependent != was_dependent)
3317 {
3318 i = j-1;
3319 break;
3320 }
3321 }
3322 }
3323 }
3324
3325 for (i= 0, s= stat ; i < table_count ; i++, s++)
3326 {
3327 /* Catch illegal cross references for outer joins */
3328 if (s->dependent & s->table->map)
3329 {
3330 join->tables=0; // Don't use join->table
3331 join->primary_tables= 0;
3332 my_message(ER_WRONG_OUTER_JOIN, ER(ER_WRONG_OUTER_JOIN), MYF(0));
3333 goto error;
3334 }
3335
3336 if (outer_join & s->table->map)
3337 s->table->maybe_null= 1;
3338 s->key_dependent= s->dependent;
3339 }
3340 }
3341
3342 if (unlikely(trace->is_started()))
3343 trace_table_dependencies(trace, stat, table_count);
3344
3345 if (conds || outer_join)
3346 if (update_ref_and_keys(thd, keyuse_array, stat, join->tables,
3347 conds, join->cond_equal,
3348 ~outer_join, join->select_lex, &sargables))
3349 goto error;
3350
3351 /*
3352 Pull out semi-join tables based on dependencies. Dependencies are valid
3353 throughout the lifetime of a query, so this operation can be performed
3354 on the first optimization only.
3355 */
3356 if (first_optimization && sj_nests)
3357 {
3358 if (pull_out_semijoin_tables(join))
3359 DBUG_RETURN(true);
3360 sj_nests= join->select_lex->sj_nests.elements;
3361 }
3362
3363 /*
3364 Extract const tables based on row counts, must be done for each execution.
3365 Tables containing exactly zero or one rows are marked as const, but
3366 notice the additional constraints checked below.
3367 Tables that are extracted have their rows read before actual execution
3368 starts and are placed in the beginning of the join_tab array.
3369 Thus, they do not take part in join order optimization process,
3370 which can significantly reduce the optimization time.
3371 The data read from these tables can also be regarded as "constant"
3372 throughout query execution, hence the column values can be used for
3373 additional constant propagation and extraction of const tables based
3374 on eq-ref properties.
3375 */
3376 enum enum_const_table_extraction
3377 {
3378 extract_no_table= 0,
3379 extract_empty_table= 1,
3380 extract_const_table= 2
3381 };
3382
3383 if (join->no_const_tables)
3384 goto const_table_extraction_done;
3385
3386 for (i= 0, s= stat; i < table_count; i++, s++)
3387 {
3388 TABLE *const table= s->table;
3389 TABLE_LIST *const tables= table->pos_in_table_list;
3390 enum enum_const_table_extraction extract_method= extract_const_table;
3391
3392 #ifdef WITH_PARTITION_STORAGE_ENGINE
3393 const bool all_partitions_pruned_away= table->all_partitions_pruned_away;
3394 #else
3395 const bool all_partitions_pruned_away= false;
3396 #endif
3397
3398 if (tables->outer_join_nest())
3399 {
3400 /*
3401 Table belongs to a nested join, no candidate for const table extraction.
3402 */
3403 extract_method= extract_no_table;
3404 }
3405 else if (tables->embedding && tables->embedding->sj_on_expr)
3406 {
3407 /*
3408 Table belongs to a semi-join.
3409 We do not currently pull out const tables from semi-join nests.
3410 */
3411 extract_method= extract_no_table;
3412 }
3413 else if (*s->on_expr_ref)
3414 {
3415 /* s is the only inner table of an outer join, extract empty tables */
3416 extract_method= extract_empty_table;
3417 }
3418 switch (extract_method)
3419 {
3420 case extract_no_table:
3421 break;
3422
3423 case extract_empty_table:
3424 /* Extract tables with zero rows, but only if statistics are exact */
3425 if ((table->file->stats.records == 0 ||
3426 all_partitions_pruned_away) &&
3427 (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT))
3428 set_position(join, const_count++, s, NULL);
3429 break;
3430
3431 case extract_const_table:
3432 /*
3433 Extract tables with zero or one rows, but do not extract tables that
3434 1. are dependent upon other tables, or
3435 2. have no exact statistics, or
3436 3. are full-text searched
3437 */
3438 if ((table->s->system ||
3439 table->file->stats.records <= 1 ||
3440 all_partitions_pruned_away) &&
3441 !s->dependent && // 1
3442 (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) && // 2
3443 !table->fulltext_searched) // 3
3444 set_position(join, const_count++, s, NULL);
3445 break;
3446 }
3447 }
3448 /* Read const tables (tables matching no more than 1 rows) */
3449
3450 for (POSITION *p_pos=join->positions, *p_end=p_pos+const_count;
3451 p_pos < p_end ;
3452 p_pos++)
3453 {
3454 int tmp;
3455 s= p_pos->table;
3456 s->type=JT_SYSTEM;
3457 join->const_table_map|=s->table->map;
3458 if ((tmp=join_read_const_table(s, p_pos)))
3459 {
3460 if (tmp > 0)
3461 goto error; // Fatal error
3462 }
3463 else
3464 {
3465 join->found_const_table_map|= s->table->map;
3466 s->table->pos_in_table_list->optimized_away= TRUE;
3467 }
3468 }
3469
3470 const_table_extraction_done:
3471 /* loop until no more const tables are found */
3472 int ref_changed;
3473 do
3474 {
3475 more_const_tables_found:
3476 ref_changed = 0;
3477 found_ref=0;
3478
3479 /*
3480 We only have to loop from stat_vector + const_count as
3481 set_position() will move all const_tables first in stat_vector
3482 */
3483
3484 for (JOIN_TAB **pos=stat_vector+const_count ; (s= *pos) ; pos++)
3485 {
3486 TABLE *const table= s->table;
3487 TABLE_LIST *const tl= table->pos_in_table_list;
3488 /*
3489 If equi-join condition by a key is null rejecting and after a
3490 substitution of a const table the key value happens to be null
3491 then we can state that there are no matches for this equi-join.
3492 */
3493 if ((keyuse= s->keyuse) && *s->on_expr_ref && !s->embedding_map)
3494 {
3495 /*
3496 When performing an outer join operation if there are no matching rows
3497 for the single row of the outer table all the inner tables are to be
3498 null complemented and thus considered as constant tables.
3499 Here we apply this consideration to the case of outer join operations
3500 with a single inner table only because the case with nested tables
3501 would require a more thorough analysis.
3502 TODO. Apply single row substitution to null complemented inner tables
3503 for nested outer join operations.
3504 */
3505 while (keyuse->table == table)
3506 {
3507 if (!(keyuse->val->used_tables() & ~join->const_table_map) &&
3508 keyuse->val->is_null() && keyuse->null_rejecting)
3509 {
3510 s->type= JT_CONST;
3511 mark_as_null_row(table);
3512 join->found_const_table_map|= table->map;
3513 join->const_table_map|= table->map;
3514 set_position(join, const_count++, s, NULL);
3515 goto more_const_tables_found;
3516 }
3517 keyuse++;
3518 }
3519 }
3520
3521 if (s->dependent) // If dependent on some table
3522 {
3523 // All dep. must be constants
3524 if (s->dependent & ~(join->const_table_map))
3525 continue;
3526 /*
3527 Mark a dependent table as constant if
3528 1. it has exactly zero or one rows (it is a system table), and
3529 2. it is not within a nested outer join, and
3530 3. it does not have an expensive outer join condition.
3531 This is because we have to determine whether an outer-joined table
3532 has a real row or a null-extended row in the optimizer phase.
3533 We have no possibility to evaluate its join condition at
3534 execution time, when it is marked as a system table.
3535 */
3536 if (table->file->stats.records <= 1L && // 1
3537 (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) && // 1
3538 !tl->outer_join_nest() && // 2
3539 !(*s->on_expr_ref && (*s->on_expr_ref)->is_expensive())) // 3
3540 { // system table
3541 int tmp= 0;
3542 s->type=JT_SYSTEM;
3543 join->const_table_map|=table->map;
3544 set_position(join, const_count++, s, NULL);
3545 if ((tmp= join_read_const_table(s, join->positions+const_count-1)))
3546 {
3547 if (tmp > 0)
3548 goto error; // Fatal error
3549 }
3550 else
3551 join->found_const_table_map|= table->map;
3552 continue;
3553 }
3554 }
3555 /* check if table can be read by key or table only uses const refs */
3556 if ((keyuse=s->keyuse))
3557 {
3558 s->type= JT_REF;
3559 while (keyuse->table == table)
3560 {
3561 start_keyuse=keyuse;
3562 key=keyuse->key;
3563 s->keys.set_bit(key); // QQ: remove this ?
3564
3565 refs=0;
3566 key_map const_ref, eq_part;
3567 do
3568 {
3569 if (keyuse->val->type() != Item::NULL_ITEM && !keyuse->optimize)
3570 {
3571 if (!((~join->found_const_table_map) & keyuse->used_tables))
3572 const_ref.set_bit(keyuse->keypart);
3573 else
3574 refs|=keyuse->used_tables;
3575 eq_part.set_bit(keyuse->keypart);
3576 }
3577 keyuse++;
3578 } while (keyuse->table == table && keyuse->key == key);
3579
3580 /*
3581 Extract const tables with proper key dependencies.
3582 Exclude tables that
3583 1. are full-text searched, or
3584 2. are part of nested outer join, or
3585 3. are part of semi-join, or
3586 4. have an expensive outer join condition.
3587 5. are blocked by handler for const table optimize.
3588 */
3589 if (eq_part.is_prefix(table->key_info[key].user_defined_key_parts) &&
3590 !table->fulltext_searched && // 1
3591 !tl->outer_join_nest() && // 2
3592 !(tl->embedding && tl->embedding->sj_on_expr) && // 3
3593 !(*s->on_expr_ref && (*s->on_expr_ref)->is_expensive()) &&// 4
3594 !(table->file->ha_table_flags() & HA_BLOCK_CONST_TABLE)) // 5
3595 {
3596 if (table->key_info[key].flags & HA_NOSAME)
3597 {
3598 if (const_ref == eq_part)
3599 { // Found everything for ref.
3600 int tmp;
3601 ref_changed = 1;
3602 s->type= JT_CONST;
3603 join->const_table_map|=table->map;
3604 set_position(join,const_count++,s,start_keyuse);
3605 if (create_ref_for_key(join, s, start_keyuse,
3606 join->found_const_table_map))
3607 goto error;
3608 if ((tmp=join_read_const_table(s,
3609 join->positions+const_count-1)))
3610 {
3611 if (tmp > 0)
3612 goto error; // Fatal error
3613 }
3614 else
3615 join->found_const_table_map|= table->map;
3616 break;
3617 }
3618 else
3619 found_ref|= refs; // Table is const if all refs are const
3620 }
3621 else if (const_ref == eq_part)
3622 s->const_keys.set_bit(key);
3623 }
3624 }
3625 }
3626 }
3627 } while (join->const_table_map & found_ref && ref_changed);
3628
3629 /*
3630 Update info on indexes that can be used for search lookups as
3631 reading const tables may has added new sargable predicates.
3632 */
3633 if (const_count && sargables)
3634 {
3635 for( ; sargables->field ; sargables++)
3636 {
3637 Field *field= sargables->field;
3638 JOIN_TAB *join_tab= field->table->reginfo.join_tab;
3639 key_map possible_keys= field->key_start;
3640 possible_keys.intersect(field->table->keys_in_use_for_query);
3641 bool is_const= 1;
3642 for (uint j=0; j < sargables->num_values; j++)
3643 is_const&= sargables->arg_value[j]->const_item();
3644 if (is_const)
3645 {
3646 join_tab->const_keys.merge(possible_keys);
3647 join_tab->keys.merge(possible_keys);
3648 }
3649 }
3650 }
3651
3652 {
3653 Opt_trace_object trace_wrapper(trace);
3654 /* Calc how many (possible) matched records in each table */
3655 Opt_trace_array trace_records(trace, "rows_estimation");
3656
3657 for (s= stat ; s < stat_end ; s++)
3658 {
3659 Opt_trace_object trace_table(trace);
3660 trace_table.add_utf8_table(s->table);
3661 if (s->type == JT_SYSTEM || s->type == JT_CONST)
3662 {
3663 trace_table.add("rows", 1).add("cost", 1)
3664 .add_alnum("table_type", (s->type == JT_SYSTEM) ? "system": "const")
3665 .add("empty", static_cast<bool>(s->table->null_row));
3666
3667 /* Only one matching row */
3668 s->found_records= s->records= s->read_time=1; s->worst_seeks= 1.0;
3669 continue;
3670 }
3671 /* Approximate found rows and time to read them */
3672 s->found_records= s->records= s->table->file->stats.records;
3673 s->read_time= (ha_rows) s->table->file->scan_time();
3674
3675 /*
3676 Set a max range of how many seeks we can expect when using keys
3677 This is can't be to high as otherwise we are likely to use
3678 table scan.
3679 */
3680 s->worst_seeks= min((double) s->found_records / 10,
3681 (double) s->read_time * 3);
3682 if (s->worst_seeks < 2.0) // Fix for small tables
3683 s->worst_seeks= 2.0;
3684
3685 /*
3686 Add to stat->const_keys those indexes for which all group fields or
3687 all select distinct fields participate in one index.
3688 */
3689 add_group_and_distinct_keys(join, s);
3690
3691 /*
3692 Perform range analysis if there are keys it could use (1).
3693 Don't do range analysis if on the inner side of an outer join (2).
3694 Do range analysis if on the inner side of a semi-join (3).
3695 */
3696 TABLE_LIST *const tl= s->table->pos_in_table_list;
3697 if (!s->const_keys.is_clear_all() && // (1)
3698 (!tl->embedding || // (2)
3699 (tl->embedding && tl->embedding->sj_on_expr))) // (3)
3700 {
3701 ha_rows records;
3702 SQL_SELECT *select;
3703 select= make_select(s->table, join->found_const_table_map,
3704 join->found_const_table_map,
3705 *s->on_expr_ref ? *s->on_expr_ref : conds,
3706 1, &error);
3707 if (!select)
3708 goto error;
3709 records= get_quick_record_count(thd, select, s->table,
3710 &s->const_keys, join->row_limit);
3711
3712 if (records == 0 && thd->is_fatal_error)
3713 DBUG_RETURN(true);
3714
3715 s->quick= select->quick;
3716 s->needed_reg= select->needed_reg;
3717 select->quick= 0;
3718 /*
3719 Check for "impossible range", but make sure that we do not attempt
3720 to mark semi-joined tables as "const" (only semi-joined tables that
3721 are functionally dependent can be marked "const", and subsequently
3722 pulled out of their semi-join nests).
3723 */
3724 if (records == 0 &&
3725 s->table->reginfo.impossible_range &&
3726 (!(tl->embedding && tl->embedding->sj_on_expr)))
3727 {
3728 /*
3729 Impossible WHERE or ON expression
3730 In case of ON, we mark that the we match one empty NULL row.
3731 In case of WHERE, don't set found_const_table_map to get the
3732 caller to abort with a zero row result.
3733 */
3734 join->const_table_map|= s->table->map;
3735 set_position(join, const_count++, s, NULL);
3736 s->type= JT_CONST;
3737 if (*s->on_expr_ref)
3738 {
3739 /* Generate empty row */
3740 s->info= ET_IMPOSSIBLE_ON_CONDITION;
3741 trace_table.add("returning_empty_null_row", true).
3742 add_alnum("cause", "impossible_on_condition");
3743 join->found_const_table_map|= s->table->map;
3744 s->type= JT_CONST;
3745 mark_as_null_row(s->table); // All fields are NULL
3746 }
3747 else
3748 {
3749 trace_table.add("rows", 0).
3750 add_alnum("cause", "impossible_where_condition");
3751 }
3752 }
3753 if (records != HA_POS_ERROR)
3754 {
3755 s->found_records= records;
3756 s->read_time= (ha_rows) (s->quick ? s->quick->read_time : 0.0);
3757 }
3758 delete select;
3759 }
3760 else
3761 Opt_trace_object(trace, "table_scan").
3762 add("rows", s->found_records).
3763 add("cost", s->read_time);
3764 }
3765 }
3766
3767 join->join_tab=stat;
3768 join->map2table=stat_ref;
3769 join->const_tables=const_count;
3770
3771 if (sj_nests)
3772 join->set_semijoin_embedding();
3773
3774 if (!join->plan_is_const())
3775 optimize_keyuse(join, keyuse_array);
3776
3777 join->allow_outer_refs= true;
3778
3779 if (sj_nests && optimize_semijoin_nests_for_materialization(join))
3780 DBUG_RETURN(true);
3781
3782 if (Optimize_table_order(thd, join, NULL).choose_table_order())
3783 DBUG_RETURN(true);
3784
3785 DBUG_EXECUTE_IF("bug13820776_1", thd->killed= THD::KILL_QUERY;);
3786 if (thd->killed || thd->is_error())
3787 DBUG_RETURN(true);
3788
3789 if (join->unit->item && join->decide_subquery_strategy())
3790 DBUG_RETURN(true);
3791
3792 join->refine_best_rowcount();
3793
3794 // Only best_positions should be needed from now on.
3795 join->positions= NULL;
3796 join->best_ref= NULL;
3797
3798 /*
3799 Store the cost of this query into a user variable
3800 Don't update last_query_cost for statements that are not "flat joins" :
3801 i.e. they have subqueries, unions or call stored procedures.
3802 TODO: calculate a correct cost for a query with subqueries and UNIONs.
3803 */
3804 if (thd->lex->is_single_level_stmt())
3805 thd->status_var.last_query_cost= join->best_read;
3806
3807 /* Generate an execution plan from the found optimal join order. */
3808 if (join->get_best_combination())
3809 DBUG_RETURN(true);
3810
3811 // No need for this struct after new JOIN_TAB array is set up.
3812 join->best_positions= NULL;
3813
3814 /* Some called function may still set thd->is_fatal_error unnoticed */
3815 if (thd->is_fatal_error)
3816 DBUG_RETURN(true);
3817
3818 DBUG_RETURN(false);
3819
3820 error:
3821 /*
3822 Need to clean up join_tab from TABLEs in case of error.
3823 They won't get cleaned up by JOIN::cleanup() because JOIN::join_tab
3824 may not be assigned yet by this function (which is building join_tab).
3825 Dangling TABLE::reginfo.join_tab may cause part_of_refkey to choke.
3826 */
3827 for (tables= tables_arg; tables; tables= tables->next_leaf)
3828 tables->table->reginfo.join_tab= NULL;
3829 DBUG_RETURN(true);
3830 }
3831
3832
3833 /**
3834 Set semi-join embedding join nest pointers.
3835
3836 Set pointer to embedding semi-join nest for all semi-joined tables.
3837 Note that this must be done for every table inside all semi-join nests,
3838 even for tables within outer join nests embedded in semi-join nests.
3839 A table can never be part of multiple semi-join nests, hence no
3840 ambiguities can ever occur.
3841 Note also that the pointer is not set for TABLE_LIST objects that
3842 are outer join nests within semi-join nests.
3843 */
3844
set_semijoin_embedding()3845 void JOIN::set_semijoin_embedding()
3846 {
3847 DBUG_ASSERT(!select_lex->sj_nests.is_empty());
3848
3849 JOIN_TAB *const tab_end= join_tab + primary_tables;
3850
3851 for (JOIN_TAB *tab= join_tab; tab < tab_end; tab++)
3852 {
3853 for (TABLE_LIST *tr= tab->table->pos_in_table_list;
3854 tr->embedding;
3855 tr= tr->embedding)
3856 {
3857 if (tr->embedding->sj_on_expr)
3858 {
3859 tab->emb_sj_nest= tr->embedding;
3860 break;
3861 }
3862 }
3863 }
3864 }
3865
3866
3867 /**
3868 @brief Check if semijoin's compared types allow materialization.
3869
3870 @param[inout] sj_nest Semi-join nest containing information about correlated
3871 expressions. Set nested_join->sjm.scan_allowed to TRUE if
3872 MaterializeScan strategy allowed. Set nested_join->sjm.lookup_allowed
3873 to TRUE if MaterializeLookup strategy allowed
3874
3875 @details
3876 This is a temporary fix for BUG#36752.
3877
3878 There are two subquery materialization strategies for semijoin:
3879
3880 1. Materialize and do index lookups in the materialized table. See
3881 BUG#36752 for description of restrictions we need to put on the
3882 compared expressions.
3883
3884 In addition, since indexes are not supported for BLOB columns,
3885 this strategy can not be used if any of the columns in the
3886 materialized table will be BLOB/GEOMETRY columns. (Note that
3887 also columns for non-BLOB values that may be greater in size
3888 than CONVERT_IF_BIGGER_TO_BLOB, will be represented as BLOB
3889 columns.)
3890
3891 2. Materialize and then do a full scan of the materialized table.
3892 The same criteria as for MaterializeLookup are applied, except that
3893 BLOB/GEOMETRY columns are allowed.
3894 */
3895
3896 static
semijoin_types_allow_materialization(TABLE_LIST * sj_nest)3897 void semijoin_types_allow_materialization(TABLE_LIST *sj_nest)
3898 {
3899 DBUG_ENTER("semijoin_types_allow_materialization");
3900
3901 DBUG_ASSERT(sj_nest->nested_join->sj_outer_exprs.elements ==
3902 sj_nest->nested_join->sj_inner_exprs.elements);
3903
3904 if (sj_nest->nested_join->sj_outer_exprs.elements > MAX_REF_PARTS)
3905 {
3906 sj_nest->nested_join->sjm.scan_allowed= false;
3907 sj_nest->nested_join->sjm.lookup_allowed= false;
3908 DBUG_VOID_RETURN;
3909 }
3910
3911 List_iterator<Item> it1(sj_nest->nested_join->sj_outer_exprs);
3912 List_iterator<Item> it2(sj_nest->nested_join->sj_inner_exprs);
3913
3914 sj_nest->nested_join->sjm.scan_allowed= false;
3915 sj_nest->nested_join->sjm.lookup_allowed= false;
3916
3917 bool blobs_involved= false;
3918 Item *outer, *inner;
3919 while (outer= it1++, inner= it2++)
3920 {
3921 if (!types_allow_materialization(outer, inner))
3922 DBUG_VOID_RETURN;
3923 blobs_involved|= inner->is_blob_field();
3924 }
3925 sj_nest->nested_join->sjm.scan_allowed= true;
3926 sj_nest->nested_join->sjm.lookup_allowed= !blobs_involved;
3927
3928 if (sj_nest->embedding)
3929 {
3930 DBUG_ASSERT(sj_nest->embedding->join_cond());
3931 /*
3932 There are two issues that prevent materialization strategy from being
3933 used when a semi-join nest is on the inner side of an outer join:
3934 1. If the semi-join contains dependencies to outer tables,
3935 materialize-scan strategy cannot be used.
3936 2. Make sure that executor is able to evaluate triggered conditions
3937 for semi-join materialized tables. It should be correct, but needs
3938 verification.
3939 TODO: Remove this limitation!
3940 Handle this by disabling materialization strategies:
3941 */
3942 sj_nest->nested_join->sjm.scan_allowed= false;
3943 sj_nest->nested_join->sjm.lookup_allowed= false;
3944 DBUG_VOID_RETURN;
3945 }
3946
3947 DBUG_PRINT("info",("semijoin_types_allow_materialization: ok, allowed"));
3948
3949 DBUG_VOID_RETURN;
3950 }
3951
3952
3953 /*****************************************************************************
3954 Create JOIN_TABS, make a guess about the table types,
3955 Approximate how many records will be used in each table
3956 *****************************************************************************/
3957
3958 /**
3959 @brief
3960 Returns estimated number of rows that could be fetched by given select
3961
3962 @param thd thread handle
3963 @param select select to test
3964 @param table source table
3965 @param keys allowed keys
3966 @param limit select limit
3967
3968 @notes
3969 In case of valid range, a QUICK_SELECT_I object will be constructed and
3970 saved in select->quick.
3971
3972 @return
3973 HA_POS_ERROR for derived tables/views or if an error occur.
3974 Otherwise, estimated number of rows.
3975 */
3976
get_quick_record_count(THD * thd,SQL_SELECT * select,TABLE * table,const key_map * keys,ha_rows limit)3977 static ha_rows get_quick_record_count(THD *thd, SQL_SELECT *select,
3978 TABLE *table,
3979 const key_map *keys,ha_rows limit)
3980 {
3981 DBUG_ENTER("get_quick_record_count");
3982 uchar buff[STACK_BUFF_ALLOC];
3983 if (check_stack_overrun(thd, STACK_MIN_SIZE, buff))
3984 DBUG_RETURN(0); // Fatal error flag is set
3985
3986 DBUG_ASSERT(select);
3987
3988 TABLE_LIST *const tl= table->pos_in_table_list;
3989
3990 // Derived tables aren't filled yet, so no stats are available.
3991 if (!tl->uses_materialization())
3992 {
3993 select->head=table;
3994 int error= select->test_quick_select(thd,
3995 *keys,
3996 0, //empty table_map
3997 limit,
3998 false, //don't force quick range
3999 ORDER::ORDER_NOT_RELEVANT);
4000 if (error == 1)
4001 DBUG_RETURN(select->quick->records);
4002 if (error == -1)
4003 {
4004 table->reginfo.impossible_range=1;
4005 DBUG_RETURN(0);
4006 }
4007 DBUG_PRINT("warning",("Couldn't use record count on const keypart"));
4008 }
4009 else if (tl->materializable_is_const())
4010 {
4011 DBUG_RETURN(tl->get_unit()->get_result()->estimated_rowcount);
4012 }
4013 DBUG_RETURN(HA_POS_ERROR);
4014 }
4015
4016 /*
4017 Get estimated record length for semi-join materialization temptable
4018
4019 SYNOPSIS
4020 get_tmp_table_rec_length()
4021 items IN subquery's select list.
4022
4023 DESCRIPTION
4024 Calculate estimated record length for semi-join materialization
4025 temptable. It's an estimate because we don't follow every bit of
4026 create_tmp_table()'s logic. This isn't necessary as the return value of
4027 this function is used only for cost calculations.
4028
4029 RETURN
4030 Length of the temptable record, in bytes
4031 */
4032
get_tmp_table_rec_length(List<Item> & items)4033 static uint get_tmp_table_rec_length(List<Item> &items)
4034 {
4035 uint len= 0;
4036 Item *item;
4037 List_iterator<Item> it(items);
4038 while ((item= it++))
4039 {
4040 switch (item->result_type()) {
4041 case REAL_RESULT:
4042 len += sizeof(double);
4043 break;
4044 case INT_RESULT:
4045 if (item->max_length >= (MY_INT32_NUM_DECIMAL_DIGITS - 1))
4046 len += 8;
4047 else
4048 len += 4;
4049 break;
4050 case STRING_RESULT:
4051 /* DATE/TIME and GEOMETRY fields have STRING_RESULT result type. */
4052 if (item->is_temporal() || item->field_type() == MYSQL_TYPE_GEOMETRY)
4053 len += 8;
4054 else
4055 len += item->max_length;
4056 break;
4057 case DECIMAL_RESULT:
4058 len += 10;
4059 break;
4060 case ROW_RESULT:
4061 default:
4062 DBUG_ASSERT(0); /* purecov: deadcode */
4063 break;
4064 }
4065 }
4066 return len;
4067 }
4068
4069
4070 /**
4071 Writes to the optimizer trace information about dependencies between
4072 tables.
4073 @param trace optimizer trace
4074 @param join_tabs all JOIN_TABs of the join
4075 @param table_count how many JOIN_TABs in the 'join_tabs' array
4076 */
trace_table_dependencies(Opt_trace_context * trace,JOIN_TAB * join_tabs,uint table_count)4077 static void trace_table_dependencies(Opt_trace_context * trace,
4078 JOIN_TAB *join_tabs,
4079 uint table_count)
4080 {
4081 Opt_trace_object trace_wrapper(trace);
4082 Opt_trace_array trace_dep(trace, "table_dependencies");
4083 for (uint i= 0 ; i < table_count ; i++)
4084 {
4085 const TABLE *table= join_tabs[i].table;
4086 Opt_trace_object trace_one_table(trace);
4087 trace_one_table.add_utf8_table(table).
4088 add("row_may_be_null", table->maybe_null != 0);
4089 DBUG_ASSERT(table->map < (1ULL << table_count));
4090 for (uint j= 0; j < table_count; j++)
4091 {
4092 if (table->map & (1ULL << j))
4093 {
4094 trace_one_table.add("map_bit", j);
4095 break;
4096 }
4097 }
4098 Opt_trace_array depends_on(trace, "depends_on_map_bits");
4099 // RAND_TABLE_BIT may be in join_tabs[i].dependent, so we test all 64 bits
4100 compile_time_assert(sizeof(table->map) <= 64);
4101 for (uint j= 0; j < 64; j++)
4102 {
4103 if (join_tabs[i].dependent & (1ULL << j))
4104 depends_on.add(j);
4105 }
4106 }
4107 }
4108
4109
4110 /**
4111 Add to join_tab[i]->condition() "table.field IS NOT NULL" conditions
4112 we've inferred from ref/eq_ref access performed.
4113
4114 This function is a part of "Early NULL-values filtering for ref access"
4115 optimization.
4116
4117 Example of this optimization:
4118 For query SELECT * FROM t1,t2 WHERE t2.key=t1.field @n
4119 and plan " any-access(t1), ref(t2.key=t1.field) " @n
4120 add "t1.field IS NOT NULL" to t1's table condition. @n
4121
4122 Description of the optimization:
4123
4124 We look through equalities choosen to perform ref/eq_ref access,
4125 pick equalities that have form "tbl.part_of_key = othertbl.field"
4126 (where othertbl is a non-const table and othertbl.field may be NULL)
4127 and add them to conditions on correspoding tables (othertbl in this
4128 example).
4129
4130 Exception from that is the case when referred_tab->join != join.
4131 I.e. don't add NOT NULL constraints from any embedded subquery.
4132 Consider this query:
4133 @code
4134 SELECT A.f2 FROM t1 LEFT JOIN t2 A ON A.f2 = f1
4135 WHERE A.f3=(SELECT MIN(f3) FROM t2 C WHERE A.f4 = C.f4) OR A.f3 IS NULL;
4136 @endcode
4137 Here condition A.f3 IS NOT NULL is going to be added to the WHERE
4138 condition of the embedding query.
4139 Another example:
4140 SELECT * FROM t10, t11 WHERE (t10.a < 10 OR t10.a IS NULL)
4141 AND t11.b <=> t10.b AND (t11.a = (SELECT MAX(a) FROM t12
4142 WHERE t12.b = t10.a ));
4143 Here condition t10.a IS NOT NULL is going to be added.
4144 In both cases addition of NOT NULL condition will erroneously reject
4145 some rows of the result set.
4146 referred_tab->join != join constraint would disallow such additions.
4147
4148 This optimization doesn't affect the choices that ref, range, or join
4149 optimizer make. This was intentional because this was added after 4.1
4150 was GA.
4151
4152 Implementation overview
4153 1. update_ref_and_keys() accumulates info about null-rejecting
4154 predicates in in Key_field::null_rejecting
4155 1.1 add_key_part saves these to Key_use.
4156 2. create_ref_for_key copies them to TABLE_REF.
4157 3. add_not_null_conds adds "x IS NOT NULL" to join_tab->m_condition of
4158 appropiate JOIN_TAB members.
4159 */
4160
add_not_null_conds(JOIN * join)4161 static void add_not_null_conds(JOIN *join)
4162 {
4163 DBUG_ENTER("add_not_null_conds");
4164 for (uint i=join->const_tables ; i < join->tables ; i++)
4165 {
4166 JOIN_TAB *tab=join->join_tab+i;
4167 if ((tab->type == JT_REF || tab->type == JT_EQ_REF ||
4168 tab->type == JT_REF_OR_NULL) &&
4169 !tab->table->maybe_null)
4170 {
4171 for (uint keypart= 0; keypart < tab->ref.key_parts; keypart++)
4172 {
4173 if (tab->ref.null_rejecting & ((key_part_map)1 << keypart))
4174 {
4175 Item *item= tab->ref.items[keypart];
4176 Item *notnull;
4177 Item *real= item->real_item();
4178 DBUG_ASSERT(real->type() == Item::FIELD_ITEM);
4179 Item_field *not_null_item= (Item_field*)real;
4180 JOIN_TAB *referred_tab= not_null_item->field->table->reginfo.join_tab;
4181 /*
4182 For UPDATE queries such as:
4183 UPDATE t1 SET t1.f2=(SELECT MAX(t2.f4) FROM t2 WHERE t2.f3=t1.f1);
4184 not_null_item is the t1.f1, but it's referred_tab is 0.
4185 */
4186 if (!referred_tab || referred_tab->join != join)
4187 continue;
4188 if (!(notnull= new Item_func_isnotnull(not_null_item)))
4189 DBUG_VOID_RETURN;
4190 /*
4191 We need to do full fix_fields() call here in order to have correct
4192 notnull->const_item(). This is needed e.g. by test_quick_select
4193 when it is called from make_join_select after this function is
4194 called.
4195 */
4196 if (notnull->fix_fields(join->thd, ¬null))
4197 DBUG_VOID_RETURN;
4198 DBUG_EXECUTE("where",print_where(notnull,
4199 referred_tab->table->alias,
4200 QT_ORDINARY););
4201 referred_tab->and_with_condition(notnull, __LINE__);
4202 }
4203 }
4204 }
4205 }
4206 DBUG_VOID_RETURN;
4207 }
4208
4209
4210 /**
4211 Check if given expression only uses fields covered by index #keyno in the
4212 table tbl. The expression can use any fields in any other tables.
4213
4214 The expression is guaranteed not to be AND or OR - those constructs are
4215 handled outside of this function.
4216
4217 Restrict some function types from being pushed down to storage engine:
4218 a) Don't push down the triggered conditions. Nested outer joins execution
4219 code may need to evaluate a condition several times (both triggered and
4220 untriggered).
4221 b) Stored functions contain a statement that might start new operations (like
4222 DML statements) from within the storage engine. This does not work against
4223 all SEs.
4224 c) Subqueries might contain nested subqueries and involve more tables.
4225
4226 @param item Expression to check
4227 @param tbl The table having the index
4228 @param keyno The index number
4229 @param other_tbls_ok TRUE <=> Fields of other non-const tables are allowed
4230
4231 @return false if No, true if Yes
4232 */
4233
uses_index_fields_only(Item * item,TABLE * tbl,uint keyno,bool other_tbls_ok)4234 bool uses_index_fields_only(Item *item, TABLE *tbl, uint keyno,
4235 bool other_tbls_ok)
4236 {
4237 // Restrictions b and c.
4238 if (item->has_stored_program() || item->has_subquery())
4239 return false;
4240
4241 if (item->const_item())
4242 return true;
4243
4244 const Item::Type item_type= item->type();
4245
4246 switch (item_type) {
4247 case Item::FUNC_ITEM:
4248 {
4249 Item_func *item_func= (Item_func*)item;
4250 const Item_func::Functype func_type= item_func->functype();
4251
4252 /*
4253 Restriction a.
4254 TODO: Consider cloning the triggered condition and using the copies
4255 for:
4256 1. push the first copy down, to have most restrictive index condition
4257 possible.
4258 2. Put the second copy into tab->m_condition.
4259 */
4260 if (func_type == Item_func::TRIG_COND_FUNC)
4261 return false;
4262
4263 /* This is a function, apply condition recursively to arguments */
4264 if (item_func->argument_count() > 0)
4265 {
4266 Item **item_end= (item_func->arguments()) + item_func->argument_count();
4267 for (Item **child= item_func->arguments(); child != item_end; child++)
4268 {
4269 if (!uses_index_fields_only(*child, tbl, keyno, other_tbls_ok))
4270 return FALSE;
4271 }
4272 }
4273 return TRUE;
4274 }
4275 case Item::COND_ITEM:
4276 {
4277 /*
4278 This is a AND/OR condition. Regular AND/OR clauses are handled by
4279 make_cond_for_index() which will chop off the part that can be
4280 checked with index. This code is for handling non-top-level AND/ORs,
4281 e.g. func(x AND y).
4282 */
4283 List_iterator<Item> li(*((Item_cond*)item)->argument_list());
4284 Item *item;
4285 while ((item=li++))
4286 {
4287 if (!uses_index_fields_only(item, tbl, keyno, other_tbls_ok))
4288 return FALSE;
4289 }
4290 return TRUE;
4291 }
4292 case Item::FIELD_ITEM:
4293 {
4294 Item_field *item_field= (Item_field*)item;
4295 if (item_field->field->table != tbl)
4296 return other_tbls_ok;
4297 /*
4298 The below is probably a repetition - the first part checks the
4299 other two, but let's play it safe:
4300 */
4301 return item_field->field->part_of_key.is_set(keyno) &&
4302 item_field->field->type() != MYSQL_TYPE_GEOMETRY &&
4303 item_field->field->type() != MYSQL_TYPE_BLOB;
4304 }
4305 case Item::REF_ITEM:
4306 return uses_index_fields_only(item->real_item(), tbl, keyno,
4307 other_tbls_ok);
4308 default:
4309 return FALSE; /* Play it safe, don't push unknown non-const items */
4310 }
4311 }
4312
4313
4314 /**
4315 Optimize semi-join nests that could be run with sj-materialization
4316
4317 @param join The join to optimize semi-join nests for
4318
4319 @details
4320 Optimize each of the semi-join nests that can be run with
4321 materialization. For each of the nests, we
4322 - Generate the best join order for this "sub-join" and remember it;
4323 - Remember the sub-join execution cost (it's part of materialization
4324 cost);
4325 - Calculate other costs that will be incurred if we decide
4326 to use materialization strategy for this semi-join nest.
4327
4328 All obtained information is saved and will be used by the main join
4329 optimization pass.
4330
4331 @return false if successful, true if error
4332 */
4333
optimize_semijoin_nests_for_materialization(JOIN * join)4334 static bool optimize_semijoin_nests_for_materialization(JOIN *join)
4335 {
4336 DBUG_ENTER("optimize_semijoin_nests_for_materialization");
4337 List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests);
4338 TABLE_LIST *sj_nest;
4339 Opt_trace_context * const trace= &join->thd->opt_trace;
4340
4341 while ((sj_nest= sj_list_it++))
4342 {
4343 /* As a precaution, reset pointers that were used in prior execution */
4344 sj_nest->nested_join->sjm.positions= NULL;
4345
4346 /* Calculate the cost of materialization if materialization is allowed. */
4347 if (join->thd->optimizer_switch_flag(OPTIMIZER_SWITCH_SEMIJOIN) &&
4348 join->thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MATERIALIZATION))
4349 {
4350 /* A semi-join nest should not contain tables marked as const */
4351 DBUG_ASSERT(!(sj_nest->sj_inner_tables & join->const_table_map));
4352
4353 Opt_trace_object trace_wrapper(trace);
4354 Opt_trace_object
4355 trace_sjmat(trace, "execution_plan_for_potential_materialization");
4356 Opt_trace_array trace_sjmat_steps(trace, "steps");
4357 /*
4358 Try semijoin materialization if the semijoin is classified as
4359 non-trivially-correlated.
4360 */
4361 if (sj_nest->nested_join->sj_corr_tables)
4362 continue;
4363 /*
4364 Check whether data types allow execution with materialization.
4365 */
4366 semijoin_types_allow_materialization(sj_nest);
4367
4368 if (!sj_nest->nested_join->sjm.scan_allowed &&
4369 !sj_nest->nested_join->sjm.lookup_allowed)
4370 continue;
4371
4372 if (Optimize_table_order(join->thd, join, sj_nest).choose_table_order())
4373 DBUG_RETURN(true);
4374 const uint n_tables= my_count_bits(sj_nest->sj_inner_tables);
4375 calculate_materialization_costs(join, sj_nest, n_tables,
4376 &sj_nest->nested_join->sjm);
4377 /*
4378 Cost data is in sj_nest->nested_join->sjm. We also need to save the
4379 plan:
4380 */
4381 if (!(sj_nest->nested_join->sjm.positions=
4382 (st_position*)join->thd->alloc(sizeof(st_position)*n_tables)))
4383 DBUG_RETURN(true);
4384 memcpy(sj_nest->nested_join->sjm.positions,
4385 join->best_positions + join->const_tables,
4386 sizeof(st_position) * n_tables);
4387 }
4388 }
4389 DBUG_RETURN(false);
4390 }
4391
4392
4393 /*
4394 Check if table's Key_use elements have an eq_ref(outer_tables) candidate
4395
4396 SYNOPSIS
4397 find_eq_ref_candidate()
4398 table Table to be checked
4399 sj_inner_tables Bitmap of inner tables. eq_ref(inner_table) doesn't
4400 count.
4401
4402 DESCRIPTION
4403 Check if table's Key_use elements have an eq_ref(outer_tables) candidate
4404
4405 TODO
4406 Check again if it is feasible to factor common parts with constant table
4407 search
4408
4409 RETURN
4410 TRUE - There exists an eq_ref(outer-tables) candidate
4411 FALSE - Otherwise
4412 */
4413
find_eq_ref_candidate(TABLE * table,table_map sj_inner_tables)4414 static bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables)
4415 {
4416 Key_use *keyuse= table->reginfo.join_tab->keyuse;
4417 uint key;
4418
4419 if (keyuse)
4420 {
4421 while (1) /* For each key */
4422 {
4423 key= keyuse->key;
4424 KEY *keyinfo= table->key_info + key;
4425 key_part_map bound_parts= 0;
4426 if ((keyinfo->flags & (HA_NOSAME)) == HA_NOSAME)
4427 {
4428 do /* For all equalities on all key parts */
4429 {
4430 /* Check if this is "t.keypart = expr(outer_tables) */
4431 if (!(keyuse->used_tables & sj_inner_tables) &&
4432 !(keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL))
4433 {
4434 /*
4435 Consider only if the resulting condition does not pass a NULL
4436 value through. Especially needed for a UNIQUE index on NULLable
4437 columns where a duplicate row is possible with NULL values.
4438 */
4439 if (keyuse->null_rejecting || !keyuse->val->maybe_null ||
4440 !keyinfo->key_part[keyuse->keypart].field->maybe_null())
4441 bound_parts|= (key_part_map)1 << keyuse->keypart;
4442 }
4443 keyuse++;
4444 } while (keyuse->key == key && keyuse->table == table);
4445
4446 if (bound_parts == LOWER_BITS(uint, keyinfo->user_defined_key_parts))
4447 return TRUE;
4448 if (keyuse->table != table)
4449 return FALSE;
4450 }
4451 else
4452 {
4453 do
4454 {
4455 keyuse++;
4456 if (keyuse->table != table)
4457 return FALSE;
4458 }
4459 while (keyuse->key == key);
4460 }
4461 }
4462 }
4463 return FALSE;
4464 }
4465
4466
4467 /**
4468 Pull tables out of semi-join nests based on functional dependencies
4469
4470 @param join The join where to do the semi-join table pullout
4471
4472 @return False if successful, true if error (Out of memory)
4473
4474 @details
4475 Pull tables out of semi-join nests based on functional dependencies,
4476 ie. if a table is accessed via eq_ref(outer_tables).
4477 The function may be called several times, the caller is responsible
4478 for setting up proper key information that this function acts upon.
4479
4480 PRECONDITIONS
4481 When this function is called, the join may have several semi-join nests
4482 but it is guaranteed that one semi-join nest does not contain another.
4483 For functionally dependent tables to be pulled out, key information must
4484 have been calculated (see update_ref_and_keys()).
4485
4486 POSTCONDITIONS
4487 * Tables that were pulled out are removed from the semi-join nest they
4488 belonged to and added to the parent join nest.
4489 * For these tables, the used_tables and not_null_tables fields of
4490 the semi-join nest they belonged to will be adjusted.
4491 The semi-join nest is also marked as correlated, and
4492 sj_corr_tables and sj_depends_on are adjusted if necessary.
4493 * Semi-join nests' sj_inner_tables is set equal to used_tables
4494
4495 NOTE
4496 Table pullout may make uncorrelated subquery correlated. Consider this
4497 example:
4498
4499 ... WHERE oe IN (SELECT it1.primary_key WHERE p(it1, it2) ... )
4500
4501 here table it1 can be pulled out (we have it1.primary_key=oe which gives
4502 us functional dependency). Once it1 is pulled out, all references to it1
4503 from p(it1, it2) become references to outside of the subquery and thus
4504 make the subquery (i.e. its semi-join nest) correlated.
4505 Making the subquery (i.e. its semi-join nest) correlated prevents us from
4506 using Materialization or LooseScan to execute it.
4507 */
4508
pull_out_semijoin_tables(JOIN * join)4509 static bool pull_out_semijoin_tables(JOIN *join)
4510 {
4511 TABLE_LIST *sj_nest;
4512 DBUG_ENTER("pull_out_semijoin_tables");
4513
4514 DBUG_ASSERT(!join->select_lex->sj_nests.is_empty());
4515
4516 List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests);
4517 Opt_trace_context * const trace= &join->thd->opt_trace;
4518 Opt_trace_object trace_wrapper(trace);
4519 Opt_trace_array trace_pullout(trace, "pulled_out_semijoin_tables");
4520
4521 /* Try pulling out tables from each semi-join nest */
4522 while ((sj_nest= sj_list_it++))
4523 {
4524 table_map pulled_tables= 0;
4525 List_iterator<TABLE_LIST> child_li(sj_nest->nested_join->join_list);
4526 TABLE_LIST *tbl;
4527 /*
4528 Calculate set of tables within this semi-join nest that have
4529 other dependent tables
4530 */
4531 table_map dep_tables= 0;
4532 while ((tbl= child_li++))
4533 {
4534 TABLE *const table= tbl->table;
4535 if (table &&
4536 (table->reginfo.join_tab->dependent &
4537 sj_nest->nested_join->used_tables))
4538 dep_tables|= table->reginfo.join_tab->dependent;
4539 }
4540 /*
4541 Find which tables we can pull out based on key dependency data.
4542 Note that pulling one table out can allow us to pull out some
4543 other tables too.
4544 */
4545 bool pulled_a_table;
4546 do
4547 {
4548 pulled_a_table= FALSE;
4549 child_li.rewind();
4550 while ((tbl= child_li++))
4551 {
4552 if (tbl->table &&
4553 !(pulled_tables & tbl->table->map) &&
4554 !(dep_tables & tbl->table->map))
4555 {
4556 if (find_eq_ref_candidate(tbl->table,
4557 sj_nest->nested_join->used_tables &
4558 ~pulled_tables))
4559 {
4560 pulled_a_table= TRUE;
4561 pulled_tables |= tbl->table->map;
4562 Opt_trace_object(trace).add_utf8_table(tbl->table).
4563 add("functionally_dependent", true);
4564 /*
4565 Pulling a table out of uncorrelated subquery in general makes
4566 makes it correlated. See the NOTE to this function.
4567 */
4568 sj_nest->nested_join->sj_corr_tables|= tbl->table->map;
4569 sj_nest->nested_join->sj_depends_on|= tbl->table->map;
4570 }
4571 }
4572 }
4573 } while (pulled_a_table);
4574
4575 child_li.rewind();
4576 /*
4577 Move the pulled out TABLE_LIST elements to the parents.
4578 */
4579 sj_nest->nested_join->used_tables&= ~pulled_tables;
4580 sj_nest->nested_join->not_null_tables&= ~pulled_tables;
4581
4582 /* sj_inner_tables is a copy of nested_join->used_tables */
4583 sj_nest->sj_inner_tables= sj_nest->nested_join->used_tables;
4584
4585 if (pulled_tables)
4586 {
4587 List<TABLE_LIST> *upper_join_list= (sj_nest->embedding != NULL) ?
4588 &sj_nest->embedding->nested_join->join_list :
4589 &join->select_lex->top_join_list;
4590
4591 Prepared_stmt_arena_holder ps_arena_holder(join->thd);
4592
4593 while ((tbl= child_li++))
4594 {
4595 if (tbl->table &&
4596 !(sj_nest->nested_join->used_tables & tbl->table->map))
4597 {
4598 /*
4599 Pull the table up in the same way as simplify_joins() does:
4600 update join_list and embedding pointers but keep next[_local]
4601 pointers.
4602 */
4603 child_li.remove();
4604
4605 if (upper_join_list->push_back(tbl))
4606 DBUG_RETURN(TRUE);
4607
4608 tbl->join_list= upper_join_list;
4609 tbl->embedding= sj_nest->embedding;
4610 }
4611 }
4612
4613 /* Remove the sj-nest itself if we've removed everything from it */
4614 if (!sj_nest->nested_join->used_tables)
4615 {
4616 List_iterator<TABLE_LIST> li(*upper_join_list);
4617 /* Find the sj_nest in the list. */
4618 while (sj_nest != li++)
4619 {}
4620 li.remove();
4621 /* Also remove it from the list of SJ-nests: */
4622 sj_list_it.remove();
4623 }
4624 }
4625 }
4626 DBUG_RETURN(FALSE);
4627 }
4628
4629
4630 /*****************************************************************************
4631 Check with keys are used and with tables references with tables
4632 Updates in stat:
4633 keys Bitmap of all used keys
4634 const_keys Bitmap of all keys with may be used with quick_select
4635 keyuse Pointer to possible keys
4636 *****************************************************************************/
4637
4638 /// Used when finding key fields
4639 struct Key_field {
Key_fieldKey_field4640 Key_field(Field *field, Item *val, uint level, uint optimize, bool eq_func,
4641 bool null_rejecting, bool *cond_guard, uint sj_pred_no)
4642 : field(field), val(val), level(level), optimize(optimize), eq_func(eq_func),
4643 null_rejecting(null_rejecting), cond_guard(cond_guard),
4644 sj_pred_no(sj_pred_no)
4645 {}
4646 Field *field;
4647 Item *val; ///< May be empty if diff constant
4648 uint level;
4649 uint optimize; // KEY_OPTIMIZE_*
4650 bool eq_func;
4651 /**
4652 If true, the condition this struct represents will not be satisfied
4653 when val IS NULL.
4654 @sa Key_use::null_rejecting .
4655 */
4656 bool null_rejecting;
4657 bool *cond_guard; ///< @sa Key_use::cond_guard
4658 uint sj_pred_no; ///< @sa Key_use::sj_pred_no
4659 };
4660
4661 /* Values in optimize */
4662 #define KEY_OPTIMIZE_EXISTS 1
4663 #define KEY_OPTIMIZE_REF_OR_NULL 2
4664
4665 /**
4666 Merge new key definitions to old ones, remove those not used in both.
4667
4668 This is called for OR between different levels.
4669
4670 To be able to do 'ref_or_null' we merge a comparison of a column
4671 and 'column IS NULL' to one test. This is useful for sub select queries
4672 that are internally transformed to something like:.
4673
4674 @code
4675 SELECT * FROM t1 WHERE t1.key=outer_ref_field or t1.key IS NULL
4676 @endcode
4677
4678 Key_field::null_rejecting is processed as follows: @n
4679 result has null_rejecting=true if it is set for both ORed references.
4680 for example:
4681 - (t2.key = t1.field OR t2.key = t1.field) -> null_rejecting=true
4682 - (t2.key = t1.field OR t2.key <=> t1.field) -> null_rejecting=false
4683
4684 @todo
4685 The result of this is that we're missing some 'ref' accesses.
4686 OptimizerTeam: Fix this
4687 */
4688
4689 static Key_field *
merge_key_fields(Key_field * start,Key_field * new_fields,Key_field * end,uint and_level)4690 merge_key_fields(Key_field *start, Key_field *new_fields, Key_field *end,
4691 uint and_level)
4692 {
4693 if (start == new_fields)
4694 return start; // Impossible or
4695 if (new_fields == end)
4696 return start; // No new fields, skip all
4697
4698 Key_field *first_free=new_fields;
4699
4700 /* Mark all found fields in old array */
4701 for (; new_fields != end ; new_fields++)
4702 {
4703 for (Key_field *old=start ; old != first_free ; old++)
4704 {
4705 if (old->field == new_fields->field)
4706 {
4707 /*
4708 NOTE: below const_item() call really works as "!used_tables()", i.e.
4709 it can return FALSE where it is feasible to make it return TRUE.
4710
4711 The cause is as follows: Some of the tables are already known to be
4712 const tables (the detection code is in make_join_statistics(),
4713 above the update_ref_and_keys() call), but we didn't propagate
4714 information about this: TABLE::const_table is not set to TRUE, and
4715 Item::update_used_tables() hasn't been called for each item.
4716 The result of this is that we're missing some 'ref' accesses.
4717 TODO: OptimizerTeam: Fix this
4718 */
4719 if (!new_fields->val->const_item())
4720 {
4721 /*
4722 If the value matches, we can use the key reference.
4723 If not, we keep it until we have examined all new values
4724 */
4725 if (old->val->eq(new_fields->val, old->field->binary()))
4726 {
4727 old->level= and_level;
4728 old->optimize= ((old->optimize & new_fields->optimize &
4729 KEY_OPTIMIZE_EXISTS) |
4730 ((old->optimize | new_fields->optimize) &
4731 KEY_OPTIMIZE_REF_OR_NULL));
4732 old->null_rejecting= (old->null_rejecting &&
4733 new_fields->null_rejecting);
4734 }
4735 }
4736 else if (old->eq_func && new_fields->eq_func &&
4737 old->val->eq_by_collation(new_fields->val,
4738 old->field->binary(),
4739 old->field->charset()))
4740
4741 {
4742 old->level= and_level;
4743 old->optimize= ((old->optimize & new_fields->optimize &
4744 KEY_OPTIMIZE_EXISTS) |
4745 ((old->optimize | new_fields->optimize) &
4746 KEY_OPTIMIZE_REF_OR_NULL));
4747 old->null_rejecting= (old->null_rejecting &&
4748 new_fields->null_rejecting);
4749 }
4750 else if (old->eq_func && new_fields->eq_func &&
4751 ((old->val->const_item() && old->val->is_null()) ||
4752 new_fields->val->is_null()))
4753 {
4754 /* field = expression OR field IS NULL */
4755 old->level= and_level;
4756 old->optimize= KEY_OPTIMIZE_REF_OR_NULL;
4757 /*
4758 Remember the NOT NULL value unless the value does not depend
4759 on other tables.
4760 */
4761 if (!old->val->used_tables() && old->val->is_null())
4762 old->val= new_fields->val;
4763 /* The referred expression can be NULL: */
4764 old->null_rejecting= 0;
4765 }
4766 else
4767 {
4768 /*
4769 We are comparing two different const. In this case we can't
4770 use a key-lookup on this so it's better to remove the value
4771 and let the range optimzier handle it
4772 */
4773 if (old == --first_free) // If last item
4774 break;
4775 *old= *first_free; // Remove old value
4776 old--; // Retry this value
4777 }
4778 }
4779 }
4780 }
4781 /* Remove all not used items */
4782 for (Key_field *old=start ; old != first_free ;)
4783 {
4784 if (old->level != and_level)
4785 { // Not used in all levels
4786 if (old == --first_free)
4787 break;
4788 *old= *first_free; // Remove old value
4789 continue;
4790 }
4791 old++;
4792 }
4793 return first_free;
4794 }
4795
4796
4797 /**
4798 Given a field, return its index in semi-join's select list, or UINT_MAX
4799
4800 @param field Field that we are looking up table for
4801
4802 @retval =UINT_MAX Field is not from a semijoin-transformed subquery
4803 @retval <UINT_MAX Index in select list of subquery
4804
4805 @details
4806 Given a field, find its table; then see if the table is within a
4807 semi-join nest and if the field was in select list of the subquery
4808 (if subquery was part of a quantified comparison predicate), or
4809 the field was a result of subquery decorrelation.
4810 If it was, then return the field's index in the select list.
4811 The value is used by LooseScan strategy.
4812 */
4813
get_semi_join_select_list_index(Field * field)4814 static uint get_semi_join_select_list_index(Field *field)
4815 {
4816 TABLE_LIST *emb_sj_nest= field->table->pos_in_table_list->embedding;
4817 if (emb_sj_nest && emb_sj_nest->sj_on_expr)
4818 {
4819 List<Item> &items= emb_sj_nest->nested_join->sj_inner_exprs;
4820 List_iterator<Item> it(items);
4821 for (uint i= 0; i < items.elements; i++)
4822 {
4823 Item *sel_item= it++;
4824 if (sel_item->type() == Item::FIELD_ITEM &&
4825 ((Item_field*)sel_item)->field->eq(field))
4826 return i;
4827 }
4828 }
4829 return UINT_MAX;
4830 }
4831
4832 /**
4833 @brief
4834 If EXPLAIN EXTENDED, add warning that an index cannot be used for
4835 ref access
4836
4837 @details
4838 If EXPLAIN EXTENDED, add a warning for each index that cannot be
4839 used for ref access due to either type conversion or different
4840 collations on the field used for comparison
4841
4842 Example type conversion (char compared to int):
4843
4844 CREATE TABLE t1 (url char(1) PRIMARY KEY);
4845 SELECT * FROM t1 WHERE url=1;
4846
4847 Example different collations (danish vs german2):
4848
4849 CREATE TABLE t1 (url char(1) PRIMARY KEY) collate latin1_danish_ci;
4850 SELECT * FROM t1 WHERE url='1' collate latin1_german2_ci;
4851
4852 @param thd Thread for the connection that submitted the query
4853 @param field Field used in comparision
4854 @param cant_use_indexes Indexes that cannot be used for lookup
4855 */
4856 static void
warn_index_not_applicable(THD * thd,const Field * field,const key_map cant_use_index)4857 warn_index_not_applicable(THD *thd, const Field *field,
4858 const key_map cant_use_index)
4859 {
4860 if (thd->lex->describe & DESCRIBE_EXTENDED)
4861 for (uint j=0 ; j < field->table->s->keys ; j++)
4862 if (cant_use_index.is_set(j))
4863 push_warning_printf(thd,
4864 Sql_condition::WARN_LEVEL_WARN,
4865 ER_WARN_INDEX_NOT_APPLICABLE,
4866 ER(ER_WARN_INDEX_NOT_APPLICABLE),
4867 "ref",
4868 field->table->key_info[j].name,
4869 field->field_name);
4870 }
4871
4872 /**
4873 Add a possible key to array of possible keys if it's usable as a key
4874
4875 @param key_fields Pointer to add key, if usable
4876 @param and_level And level, to be stored in Key_field
4877 @param cond Condition predicate
4878 @param field Field used in comparision
4879 @param eq_func True if we used =, <=> or IS NULL
4880 @param value Array of values used for comparison with field
4881 @param num_values Number of elements in the array of values
4882 @param usable_tables Tables which can be used for key optimization
4883 @param sargables IN/OUT Array of found sargable candidates
4884
4885 @note
4886 If we are doing a NOT NULL comparison on a NOT NULL field in a outer join
4887 table, we store this to be able to do not exists optimization later.
4888
4889 @returns
4890 *key_fields is incremented if we stored a key in the array
4891 */
4892
4893 static void
add_key_field(Key_field ** key_fields,uint and_level,Item_func * cond,Field * field,bool eq_func,Item ** value,uint num_values,table_map usable_tables,SARGABLE_PARAM ** sargables)4894 add_key_field(Key_field **key_fields,uint and_level, Item_func *cond,
4895 Field *field, bool eq_func, Item **value, uint num_values,
4896 table_map usable_tables, SARGABLE_PARAM **sargables)
4897 {
4898 DBUG_PRINT("info",("add_key_field for field %s",field->field_name));
4899 uint exists_optimize= 0;
4900 TABLE_LIST *table= field->table->pos_in_table_list;
4901
4902 if (field->table->reginfo.join_tab == NULL)
4903 {
4904 /*
4905 Due to a bug in IN-to-EXISTS (grep for real_item() in item_subselect.cc
4906 for more info), an index over a field from an outer query might be
4907 considered here, which is incorrect. Their query has been fully
4908 optimized already so their reginfo.join_tab is NULL and we reject them.
4909 */
4910 return;
4911 }
4912
4913 if (!table->derived_keys_ready && table->uses_materialization() &&
4914 !field->table->is_created() &&
4915 table->update_derived_keys(field, value, num_values))
4916 return;
4917 if (!(field->flags & PART_KEY_FLAG))
4918 {
4919 // Don't remove column IS NULL on a LEFT JOIN table
4920 if (!eq_func || (*value)->type() != Item::NULL_ITEM ||
4921 !field->table->maybe_null || field->real_maybe_null())
4922 return; // Not a key. Skip it
4923 exists_optimize= KEY_OPTIMIZE_EXISTS;
4924 DBUG_ASSERT(num_values == 1);
4925 }
4926 else
4927 {
4928 table_map used_tables=0;
4929 bool optimizable=0;
4930 for (uint i=0; i<num_values; i++)
4931 {
4932 used_tables|=(value[i])->used_tables();
4933 if (!((value[i])->used_tables() & (field->table->map | RAND_TABLE_BIT)))
4934 optimizable=1;
4935 }
4936 if (!optimizable)
4937 return;
4938 if (!(usable_tables & field->table->map))
4939 {
4940 if (!eq_func || (*value)->type() != Item::NULL_ITEM ||
4941 !field->table->maybe_null || field->real_maybe_null())
4942 return; // Can't use left join optimize
4943 exists_optimize= KEY_OPTIMIZE_EXISTS;
4944 }
4945 else
4946 {
4947 JOIN_TAB *stat=field->table->reginfo.join_tab;
4948 key_map possible_keys=field->key_start;
4949 possible_keys.intersect(field->table->keys_in_use_for_query);
4950 stat[0].keys.merge(possible_keys); // Add possible keys
4951
4952 /*
4953 Save the following cases:
4954 Field op constant
4955 Field LIKE constant where constant doesn't start with a wildcard
4956 Field = field2 where field2 is in a different table
4957 Field op formula
4958 Field IS NULL
4959 Field IS NOT NULL
4960 Field BETWEEN ...
4961 Field IN ...
4962 */
4963 stat[0].key_dependent|=used_tables;
4964
4965 bool is_const=1;
4966 for (uint i=0; i<num_values; i++)
4967 {
4968 if (!(is_const&= value[i]->const_item()))
4969 break;
4970 }
4971 if (is_const)
4972 stat[0].const_keys.merge(possible_keys);
4973 else if (!eq_func)
4974 {
4975 /*
4976 Save info to be able check whether this predicate can be
4977 considered as sargable for range analisis after reading const tables.
4978 We do not save info about equalities as update_const_equal_items
4979 will take care of updating info on keys from sargable equalities.
4980 */
4981 (*sargables)--;
4982 (*sargables)->field= field;
4983 (*sargables)->arg_value= value;
4984 (*sargables)->num_values= num_values;
4985 }
4986 /*
4987 We can't always use indexes when comparing a string index to a
4988 number. cmp_type() is checked to allow compare of dates to numbers.
4989 eq_func is NEVER true when num_values > 1
4990 */
4991 if (!eq_func)
4992 return;
4993 if (field->result_type() == STRING_RESULT)
4994 {
4995 if ((*value)->result_type() != STRING_RESULT)
4996 {
4997 if (field->cmp_type() != (*value)->result_type())
4998 {
4999 warn_index_not_applicable(stat->join->thd, field, possible_keys);
5000 return;
5001 }
5002 }
5003 else
5004 {
5005 /*
5006 Can't optimize datetime_column=indexed_varchar_column,
5007 also can't use indexes if the effective collation
5008 of the operation differ from the field collation.
5009 IndexedTimeComparedToDate: can't optimize
5010 'indexed_time = temporal_expr_with_date_part' because:
5011 - without index, a TIME column with value '48:00:00' is equal to a
5012 DATETIME column with value 'CURDATE() + 2 days'
5013 - with ref access into the TIME column, CURDATE() + 2 days becomes
5014 "00:00:00" (Field_timef::store_internal() simply extracts the time
5015 part from the datetime) which is a lookup key which does not match
5016 "48:00:00"; so ref access is not be able to give the same result
5017 as without index, so is disabled.
5018 On the other hand, we can optimize indexed_datetime = time
5019 because Field_temporal_with_date::store_time() will convert
5020 48:00:00 to CURDATE() + 2 days which is the correct lookup key.
5021 */
5022 if ((!field->is_temporal() && value[0]->is_temporal()) ||
5023 (field->cmp_type() == STRING_RESULT &&
5024 field->charset() != cond->compare_collation()) ||
5025 field_time_cmp_date(field, value[0]))
5026 {
5027 warn_index_not_applicable(stat->join->thd, field, possible_keys);
5028 return;
5029 }
5030 }
5031 }
5032 }
5033 }
5034 /*
5035 For the moment eq_func is always true. This slot is reserved for future
5036 extensions where we want to remembers other things than just eq comparisons
5037 */
5038 DBUG_ASSERT(eq_func);
5039 /*
5040 If the condition has form "tbl.keypart = othertbl.field" and
5041 othertbl.field can be NULL, there will be no matches if othertbl.field
5042 has NULL value.
5043 We use null_rejecting in add_not_null_conds() to add
5044 'othertbl.field IS NOT NULL' to tab->m_condition, if this is not an outer
5045 join. We also use it to shortcut reading "tbl" when othertbl.field is
5046 found to be a NULL value (in join_read_always_key() and BKA).
5047 */
5048 bool null_rejecting;
5049 Item *real= (*value)->real_item();
5050 if (((cond->functype() == Item_func::EQ_FUNC) ||
5051 (cond->functype() == Item_func::MULT_EQUAL_FUNC)) &&
5052 (real->type() == Item::FIELD_ITEM) &&
5053 ((Item_field*)real)->field->maybe_null())
5054 null_rejecting= true;
5055 else
5056 null_rejecting= false;
5057
5058 /* Store possible eq field */
5059 new (*key_fields)
5060 Key_field(field, *value, and_level, exists_optimize, eq_func,
5061 null_rejecting, NULL, get_semi_join_select_list_index(field));
5062 (*key_fields)++;
5063 }
5064
5065 /**
5066 Add possible keys to array of possible keys originated from a simple
5067 predicate.
5068
5069 @param key_fields Pointer to add key, if usable
5070 @param and_level And level, to be stored in Key_field
5071 @param cond Condition predicate
5072 @param field Field used in comparision
5073 @param eq_func True if we used =, <=> or IS NULL
5074 @param value Value used for comparison with field
5075 Is NULL for BETWEEN and IN
5076 @param usable_tables Tables which can be used for key optimization
5077 @param sargables IN/OUT Array of found sargable candidates
5078
5079 @note
5080 If field items f1 and f2 belong to the same multiple equality and
5081 a key is added for f1, the the same key is added for f2.
5082
5083 @returns
5084 *key_fields is incremented if we stored a key in the array
5085 */
5086
5087 static void
add_key_equal_fields(Key_field ** key_fields,uint and_level,Item_func * cond,Item_field * field_item,bool eq_func,Item ** val,uint num_values,table_map usable_tables,SARGABLE_PARAM ** sargables)5088 add_key_equal_fields(Key_field **key_fields, uint and_level,
5089 Item_func *cond, Item_field *field_item,
5090 bool eq_func, Item **val,
5091 uint num_values, table_map usable_tables,
5092 SARGABLE_PARAM **sargables)
5093 {
5094 Field *field= field_item->field;
5095 add_key_field(key_fields, and_level, cond, field,
5096 eq_func, val, num_values, usable_tables, sargables);
5097 Item_equal *item_equal= field_item->item_equal;
5098 if (item_equal)
5099 {
5100 /*
5101 Add to the set of possible key values every substitution of
5102 the field for an equal field included into item_equal
5103 */
5104 Item_equal_iterator it(*item_equal);
5105 Item_field *item;
5106 while ((item= it++))
5107 {
5108 if (!field->eq(item->field))
5109 {
5110 add_key_field(key_fields, and_level, cond, item->field,
5111 eq_func, val, num_values, usable_tables,
5112 sargables);
5113 }
5114 }
5115 }
5116 }
5117
5118
5119 /**
5120 Check if an expression is a non-outer field.
5121
5122 Checks if an expression is a field and belongs to the current select.
5123
5124 @param field Item expression to check
5125
5126 @return boolean
5127 @retval TRUE the expression is a local field
5128 @retval FALSE it's something else
5129 */
5130
5131 static bool
is_local_field(Item * field)5132 is_local_field (Item *field)
5133 {
5134 return field->real_item()->type() == Item::FIELD_ITEM
5135 && !(field->used_tables() & OUTER_REF_TABLE_BIT)
5136 && !((Item_field *)field->real_item())->depended_from;
5137 }
5138
5139
5140 static void
add_key_fields(JOIN * join,Key_field ** key_fields,uint * and_level,Item * cond,table_map usable_tables,SARGABLE_PARAM ** sargables)5141 add_key_fields(JOIN *join, Key_field **key_fields, uint *and_level,
5142 Item *cond, table_map usable_tables,
5143 SARGABLE_PARAM **sargables)
5144 {
5145 if (cond->type() == Item_func::COND_ITEM)
5146 {
5147 List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
5148 Key_field *org_key_fields= *key_fields;
5149
5150 if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
5151 {
5152 Item *item;
5153 while ((item=li++))
5154 add_key_fields(join, key_fields, and_level, item, usable_tables,
5155 sargables);
5156 for (; org_key_fields != *key_fields ; org_key_fields++)
5157 org_key_fields->level= *and_level;
5158 }
5159 else
5160 {
5161 (*and_level)++;
5162 add_key_fields(join, key_fields, and_level, li++, usable_tables,
5163 sargables);
5164 Item *item;
5165 while ((item=li++))
5166 {
5167 Key_field *start_key_fields= *key_fields;
5168 (*and_level)++;
5169 add_key_fields(join, key_fields, and_level, item, usable_tables,
5170 sargables);
5171 *key_fields=merge_key_fields(org_key_fields,start_key_fields,
5172 *key_fields,++(*and_level));
5173 }
5174 }
5175 return;
5176 }
5177
5178 /*
5179 Subquery optimization: Conditions that are pushed down into subqueries
5180 are wrapped into Item_func_trig_cond. We process the wrapped condition
5181 but need to set cond_guard for Key_use elements generated from it.
5182 */
5183 {
5184 if (cond->type() == Item::FUNC_ITEM &&
5185 ((Item_func*)cond)->functype() == Item_func::TRIG_COND_FUNC)
5186 {
5187 Item *cond_arg= ((Item_func*)cond)->arguments()[0];
5188 if (!join->group_list && !join->order &&
5189 join->unit->item &&
5190 join->unit->item->substype() == Item_subselect::IN_SUBS &&
5191 !join->unit->is_union())
5192 {
5193 Key_field *save= *key_fields;
5194 add_key_fields(join, key_fields, and_level, cond_arg, usable_tables,
5195 sargables);
5196 // Indicate that this ref access candidate is for subquery lookup:
5197 for (; save != *key_fields; save++)
5198 save->cond_guard= ((Item_func_trig_cond*)cond)->get_trig_var();
5199 }
5200 return;
5201 }
5202 }
5203
5204 /* If item is of type 'field op field/constant' add it to key_fields */
5205 if (cond->type() != Item::FUNC_ITEM)
5206 return;
5207 Item_func *cond_func= (Item_func*) cond;
5208 switch (cond_func->select_optimize()) {
5209 case Item_func::OPTIMIZE_NONE:
5210 break;
5211 case Item_func::OPTIMIZE_KEY:
5212 {
5213 Item **values;
5214 /*
5215 Build list of possible keys for 'a BETWEEN low AND high'.
5216 It is handled similar to the equivalent condition
5217 'a >= low AND a <= high':
5218 */
5219 if (cond_func->functype() == Item_func::BETWEEN)
5220 {
5221 Item_field *field_item;
5222 bool equal_func= FALSE;
5223 uint num_values= 2;
5224 values= cond_func->arguments();
5225
5226 bool binary_cmp= (values[0]->real_item()->type() == Item::FIELD_ITEM)
5227 ? ((Item_field*)values[0]->real_item())->field->binary()
5228 : TRUE;
5229
5230 /*
5231 Additional optimization: If 'low = high':
5232 Handle as if the condition was "t.key = low".
5233 */
5234 if (!((Item_func_between*)cond_func)->negated &&
5235 values[1]->eq(values[2], binary_cmp))
5236 {
5237 equal_func= TRUE;
5238 num_values= 1;
5239 }
5240
5241 /*
5242 Append keys for 'field <cmp> value[]' if the
5243 condition is of the form::
5244 '<field> BETWEEN value[1] AND value[2]'
5245 */
5246 if (is_local_field (values[0]))
5247 {
5248 field_item= (Item_field *) (values[0]->real_item());
5249 add_key_equal_fields(key_fields, *and_level, cond_func,
5250 field_item, equal_func, &values[1],
5251 num_values, usable_tables, sargables);
5252 }
5253 /*
5254 Append keys for 'value[0] <cmp> field' if the
5255 condition is of the form:
5256 'value[0] BETWEEN field1 AND field2'
5257 */
5258 for (uint i= 1; i <= num_values; i++)
5259 {
5260 if (is_local_field (values[i]))
5261 {
5262 field_item= (Item_field *) (values[i]->real_item());
5263 add_key_equal_fields(key_fields, *and_level, cond_func,
5264 field_item, equal_func, values,
5265 1, usable_tables, sargables);
5266 }
5267 }
5268 } // if ( ... Item_func::BETWEEN)
5269
5270 // IN, NE
5271 else if (is_local_field (cond_func->key_item()) &&
5272 !(cond_func->used_tables() & OUTER_REF_TABLE_BIT))
5273 {
5274 values= cond_func->arguments()+1;
5275 if (cond_func->functype() == Item_func::NE_FUNC &&
5276 is_local_field (cond_func->arguments()[1]))
5277 values--;
5278 DBUG_ASSERT(cond_func->functype() != Item_func::IN_FUNC ||
5279 cond_func->argument_count() != 2);
5280 add_key_equal_fields(key_fields, *and_level, cond_func,
5281 (Item_field*) (cond_func->key_item()->real_item()),
5282 0, values,
5283 cond_func->argument_count()-1,
5284 usable_tables, sargables);
5285 }
5286 break;
5287 }
5288 case Item_func::OPTIMIZE_OP:
5289 {
5290 bool equal_func=(cond_func->functype() == Item_func::EQ_FUNC ||
5291 cond_func->functype() == Item_func::EQUAL_FUNC);
5292
5293 if (is_local_field (cond_func->arguments()[0]))
5294 {
5295 add_key_equal_fields(key_fields, *and_level, cond_func,
5296 (Item_field*) (cond_func->arguments()[0])->real_item(),
5297 equal_func,
5298 cond_func->arguments()+1, 1, usable_tables,
5299 sargables);
5300 }
5301 if (is_local_field (cond_func->arguments()[1]) &&
5302 cond_func->functype() != Item_func::LIKE_FUNC)
5303 {
5304 add_key_equal_fields(key_fields, *and_level, cond_func,
5305 (Item_field*) (cond_func->arguments()[1])->real_item(),
5306 equal_func,
5307 cond_func->arguments(),1,usable_tables,
5308 sargables);
5309 }
5310 break;
5311 }
5312 case Item_func::OPTIMIZE_NULL:
5313 /* column_name IS [NOT] NULL */
5314 if (is_local_field (cond_func->arguments()[0]) &&
5315 !(cond_func->used_tables() & OUTER_REF_TABLE_BIT))
5316 {
5317 Item *tmp=new Item_null;
5318 if (unlikely(!tmp)) // Should never be true
5319 return;
5320 add_key_equal_fields(key_fields, *and_level, cond_func,
5321 (Item_field*) (cond_func->arguments()[0])->real_item(),
5322 cond_func->functype() == Item_func::ISNULL_FUNC,
5323 &tmp, 1, usable_tables, sargables);
5324 }
5325 break;
5326 case Item_func::OPTIMIZE_EQUAL:
5327 Item_equal *item_equal= (Item_equal *) cond;
5328 Item *const_item= item_equal->get_const();
5329 Item_equal_iterator it(*item_equal);
5330 Item_field *item;
5331 if (const_item)
5332 {
5333 /*
5334 For each field field1 from item_equal consider the equality
5335 field1=const_item as a condition allowing an index access of the table
5336 with field1 by the keys value of field1.
5337 */
5338 while ((item= it++))
5339 {
5340 add_key_field(key_fields, *and_level, cond_func, item->field,
5341 TRUE, &const_item, 1, usable_tables, sargables);
5342 }
5343 }
5344 else
5345 {
5346 /*
5347 Consider all pairs of different fields included into item_equal.
5348 For each of them (field1, field1) consider the equality
5349 field1=field2 as a condition allowing an index access of the table
5350 with field1 by the keys value of field2.
5351 */
5352 Item_equal_iterator fi(*item_equal);
5353 while ((item= fi++))
5354 {
5355 Field *field= item->field;
5356 while ((item= it++))
5357 {
5358 if (!field->eq(item->field))
5359 {
5360 add_key_field(key_fields, *and_level, cond_func, field,
5361 TRUE, (Item **) &item, 1, usable_tables,
5362 sargables);
5363 }
5364 }
5365 it.rewind();
5366 }
5367 }
5368 break;
5369 }
5370 }
5371
5372
5373 /*
5374 Add all keys with uses 'field' for some keypart
5375 If field->and_level != and_level then only mark key_part as const_part
5376
5377 RETURN
5378 0 - OK
5379 1 - Out of memory.
5380 */
5381
5382 static bool
add_key_part(Key_use_array * keyuse_array,Key_field * key_field)5383 add_key_part(Key_use_array *keyuse_array, Key_field *key_field)
5384 {
5385 Field *field=key_field->field;
5386 TABLE *form= field->table;
5387
5388 if (key_field->eq_func && !(key_field->optimize & KEY_OPTIMIZE_EXISTS))
5389 {
5390 for (uint key=0 ; key < form->s->keys ; key++)
5391 {
5392 if (!(form->keys_in_use_for_query.is_set(key)))
5393 continue;
5394 if (form->key_info[key].flags & (HA_FULLTEXT | HA_SPATIAL))
5395 continue; // ToDo: ft-keys in non-ft queries. SerG
5396
5397 uint key_parts= actual_key_parts(&form->key_info[key]);
5398 for (uint part=0 ; part < key_parts ; part++)
5399 {
5400 if (field->eq(form->key_info[key].key_part[part].field))
5401 {
5402 const Key_use keyuse(field->table,
5403 key_field->val,
5404 key_field->val->used_tables(),
5405 key,
5406 part,
5407 key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL,
5408 (key_part_map) 1 << part,
5409 ~(ha_rows) 0, // will be set in optimize_keyuse
5410 key_field->null_rejecting,
5411 key_field->cond_guard,
5412 key_field->sj_pred_no);
5413 if (keyuse_array->push_back(keyuse))
5414 return TRUE;
5415 }
5416 }
5417 }
5418 }
5419 return FALSE;
5420 }
5421
5422
5423 static bool
add_ft_keys(Key_use_array * keyuse_array,JOIN_TAB * stat,Item * cond,table_map usable_tables)5424 add_ft_keys(Key_use_array *keyuse_array,
5425 JOIN_TAB *stat,Item *cond,table_map usable_tables)
5426 {
5427 Item_func_match *cond_func=NULL;
5428
5429 if (!cond)
5430 return FALSE;
5431
5432 if (cond->type() == Item::FUNC_ITEM)
5433 {
5434 Item_func *func=(Item_func *)cond;
5435 Item_func::Functype functype= func->functype();
5436 if (functype == Item_func::FT_FUNC)
5437 cond_func=(Item_func_match *)cond;
5438 else if (func->arg_count == 2)
5439 {
5440 Item *arg0=(Item *)(func->arguments()[0]),
5441 *arg1=(Item *)(func->arguments()[1]);
5442 if (arg1->const_item() && arg1->cols() == 1 &&
5443 arg0->type() == Item::FUNC_ITEM &&
5444 ((Item_func *) arg0)->functype() == Item_func::FT_FUNC &&
5445 ((functype == Item_func::GE_FUNC && arg1->val_real() > 0) ||
5446 (functype == Item_func::GT_FUNC && arg1->val_real() >=0)))
5447 cond_func= (Item_func_match *) arg0;
5448 else if (arg0->const_item() &&
5449 arg1->type() == Item::FUNC_ITEM &&
5450 ((Item_func *) arg1)->functype() == Item_func::FT_FUNC &&
5451 ((functype == Item_func::LE_FUNC && arg0->val_real() > 0) ||
5452 (functype == Item_func::LT_FUNC && arg0->val_real() >=0)))
5453 cond_func= (Item_func_match *) arg1;
5454 }
5455 }
5456 else if (cond->type() == Item::COND_ITEM)
5457 {
5458 List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
5459
5460 if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
5461 {
5462 Item *item;
5463 while ((item=li++))
5464 {
5465 if (add_ft_keys(keyuse_array,stat,item,usable_tables))
5466 return TRUE;
5467 }
5468 }
5469 }
5470
5471 if (!cond_func || cond_func->key == NO_SUCH_KEY ||
5472 !(usable_tables & cond_func->table->map))
5473 return FALSE;
5474
5475 const Key_use keyuse(cond_func->table,
5476 cond_func,
5477 cond_func->key_item()->used_tables(),
5478 cond_func->key,
5479 FT_KEYPART,
5480 0, // optimize
5481 0, // keypart_map
5482 ~(ha_rows)0, // ref_table_rows
5483 false, // null_rejecting
5484 NULL, // cond_guard
5485 UINT_MAX); // sj_pred_no
5486 return keyuse_array->push_back(keyuse);
5487 }
5488
5489
sort_keyuse(Key_use * a,Key_use * b)5490 static int sort_keyuse(Key_use *a, Key_use *b)
5491 {
5492 int res;
5493 if (a->table->tablenr != b->table->tablenr)
5494 return (int) (a->table->tablenr - b->table->tablenr);
5495 if (a->key != b->key)
5496 return (int) (a->key - b->key);
5497 if (a->keypart != b->keypart)
5498 return (int) (a->keypart - b->keypart);
5499 // Place const values before other ones
5500 if ((res= MY_TEST((a->used_tables & ~OUTER_REF_TABLE_BIT)) -
5501 MY_TEST((b->used_tables & ~OUTER_REF_TABLE_BIT))))
5502 return res;
5503 /* Place rows that are not 'OPTIMIZE_REF_OR_NULL' first */
5504 return (int) ((a->optimize & KEY_OPTIMIZE_REF_OR_NULL) -
5505 (b->optimize & KEY_OPTIMIZE_REF_OR_NULL));
5506 }
5507
5508
5509 /*
5510 Add to Key_field array all 'ref' access candidates within nested join.
5511
5512 This function populates Key_field array with entries generated from the
5513 ON condition of the given nested join, and does the same for nested joins
5514 contained within this nested join.
5515
5516 @param[in] nested_join_table Nested join pseudo-table to process
5517 @param[in,out] end End of the key field array
5518 @param[in,out] and_level And-level
5519 @param[in,out] sargables Array of found sargable candidates
5520
5521
5522 @note
5523 We can add accesses to the tables that are direct children of this nested
5524 join (1), and are not inner tables w.r.t their neighbours (2).
5525
5526 Example for #1 (outer brackets pair denotes nested join this function is
5527 invoked for):
5528 @code
5529 ... LEFT JOIN (t1 LEFT JOIN (t2 ... ) ) ON cond
5530 @endcode
5531 Example for #2:
5532 @code
5533 ... LEFT JOIN (t1 LEFT JOIN t2 ) ON cond
5534 @endcode
5535 In examples 1-2 for condition cond, we can add 'ref' access candidates to
5536 t1 only.
5537 Example #3:
5538 @code
5539 ... LEFT JOIN (t1, t2 LEFT JOIN t3 ON inner_cond) ON cond
5540 @endcode
5541 Here we can add 'ref' access candidates for t1 and t2, but not for t3.
5542 */
5543
add_key_fields_for_nj(JOIN * join,TABLE_LIST * nested_join_table,Key_field ** end,uint * and_level,SARGABLE_PARAM ** sargables)5544 static void add_key_fields_for_nj(JOIN *join, TABLE_LIST *nested_join_table,
5545 Key_field **end, uint *and_level,
5546 SARGABLE_PARAM **sargables)
5547 {
5548 List_iterator<TABLE_LIST> li(nested_join_table->nested_join->join_list);
5549 List_iterator<TABLE_LIST> li2(nested_join_table->nested_join->join_list);
5550 bool have_another = FALSE;
5551 table_map tables= 0;
5552 TABLE_LIST *table;
5553 DBUG_ASSERT(nested_join_table->nested_join);
5554
5555 while ((table= li++) || (have_another && (li=li2, have_another=FALSE,
5556 (table= li++))))
5557 {
5558 if (table->nested_join)
5559 {
5560 if (!table->join_cond())
5561 {
5562 /* It's a semi-join nest. Walk into it as if it wasn't a nest */
5563 have_another= TRUE;
5564 li2= li;
5565 li= List_iterator<TABLE_LIST>(table->nested_join->join_list);
5566 }
5567 else
5568 add_key_fields_for_nj(join, table, end, and_level, sargables);
5569 }
5570 else
5571 if (!table->join_cond())
5572 tables |= table->table->map;
5573 }
5574 if (nested_join_table->join_cond())
5575 add_key_fields(join, end, and_level, nested_join_table->join_cond(), tables,
5576 sargables);
5577 }
5578
5579
5580 /**
5581 Check for the presence of AGGFN(DISTINCT a) queries that may be subject
5582 to loose index scan.
5583
5584
5585 Check if the query is a subject to AGGFN(DISTINCT) using loose index scan
5586 (QUICK_GROUP_MIN_MAX_SELECT).
5587 Optionally (if out_args is supplied) will push the arguments of
5588 AGGFN(DISTINCT) to the list
5589
5590 Check for every COUNT(DISTINCT), AVG(DISTINCT) or
5591 SUM(DISTINCT). These can be resolved by Loose Index Scan as long
5592 as all the aggregate distinct functions refer to the same
5593 fields. Thus:
5594
5595 SELECT AGGFN(DISTINCT a, b), AGGFN(DISTINCT b, a)... => can use LIS
5596 SELECT AGGFN(DISTINCT a), AGGFN(DISTINCT a) ... => can use LIS
5597 SELECT AGGFN(DISTINCT a, b), AGGFN(DISTINCT a) ... => cannot use LIS
5598 SELECT AGGFN(DISTINCT a), AGGFN(DISTINCT b) ... => cannot use LIS
5599 etc.
5600
5601 @param join the join to check
5602 @param[out] out_args Collect the arguments of the aggregate functions
5603 to a list. We don't worry about duplicates as
5604 these will be sorted out later in
5605 get_best_group_min_max.
5606
5607 @return does the query qualify for indexed AGGFN(DISTINCT)
5608 @retval true it does
5609 @retval false AGGFN(DISTINCT) must apply distinct in it.
5610 */
5611
5612 bool
is_indexed_agg_distinct(JOIN * join,List<Item_field> * out_args)5613 is_indexed_agg_distinct(JOIN *join, List<Item_field> *out_args)
5614 {
5615 Item_sum **sum_item_ptr;
5616 bool result= false;
5617 Field_map first_aggdistinct_fields;
5618
5619 if (join->primary_tables > 1 || /* reference more than 1 table */
5620 join->select_distinct || /* or a DISTINCT */
5621 join->select_lex->olap == ROLLUP_TYPE) /* Check (B3) for ROLLUP */
5622 return false;
5623
5624 if (join->make_sum_func_list(join->all_fields, join->fields_list, true))
5625 return false;
5626
5627 for (sum_item_ptr= join->sum_funcs; *sum_item_ptr; sum_item_ptr++)
5628 {
5629 Item_sum *sum_item= *sum_item_ptr;
5630 Field_map cur_aggdistinct_fields;
5631 Item *expr;
5632 /* aggregate is not AGGFN(DISTINCT) or more than 1 argument to it */
5633 switch (sum_item->sum_func())
5634 {
5635 case Item_sum::MIN_FUNC:
5636 case Item_sum::MAX_FUNC:
5637 continue;
5638 case Item_sum::COUNT_DISTINCT_FUNC:
5639 break;
5640 case Item_sum::AVG_DISTINCT_FUNC:
5641 case Item_sum::SUM_DISTINCT_FUNC:
5642 if (sum_item->get_arg_count() == 1)
5643 break;
5644 /* fall through */
5645 default: return false;
5646 }
5647
5648 for (uint i= 0; i < sum_item->get_arg_count(); i++)
5649 {
5650 expr= sum_item->get_arg(i);
5651 /* The AGGFN(DISTINCT) arg is not an attribute? */
5652 if (expr->real_item()->type() != Item::FIELD_ITEM)
5653 return false;
5654
5655 Item_field* item= static_cast<Item_field*>(expr->real_item());
5656 if (out_args)
5657 out_args->push_back(item);
5658
5659 cur_aggdistinct_fields.set_bit(item->field->field_index);
5660 result= true;
5661 }
5662 /*
5663 If there are multiple aggregate functions, make sure that they all
5664 refer to exactly the same set of columns.
5665 */
5666 if (first_aggdistinct_fields.is_clear_all())
5667 first_aggdistinct_fields.merge(cur_aggdistinct_fields);
5668 else if (first_aggdistinct_fields != cur_aggdistinct_fields)
5669 return false;
5670 }
5671
5672 return result;
5673 }
5674
5675
5676 /**
5677 Print keys that were appended to join_tab->const_keys because they
5678 can be used for GROUP BY or DISTINCT to the optimizer trace.
5679
5680 @param trace The optimizer trace context we're adding info to
5681 @param join_tab The table the indices cover
5682 @param new_keys The keys that are considered useful because they can
5683 be used for GROUP BY or DISTINCT
5684 @param cause Zero-terminated string with reason for adding indices
5685 to const_keys
5686
5687 @see add_group_and_distinct_keys()
5688 */
trace_indices_added_group_distinct(Opt_trace_context * trace,const JOIN_TAB * join_tab,const key_map new_keys,const char * cause)5689 static void trace_indices_added_group_distinct(Opt_trace_context *trace,
5690 const JOIN_TAB *join_tab,
5691 const key_map new_keys,
5692 const char* cause)
5693 {
5694 #ifdef OPTIMIZER_TRACE
5695 if (likely(!trace->is_started()))
5696 return;
5697
5698 KEY *key_info= join_tab->table->key_info;
5699 key_map existing_keys= join_tab->const_keys;
5700 uint nbrkeys= join_tab->table->s->keys;
5701
5702 Opt_trace_object trace_summary(trace, "const_keys_added");
5703 {
5704 Opt_trace_array trace_key(trace,"keys");
5705 for (uint j= 0 ; j < nbrkeys ; j++)
5706 if (new_keys.is_set(j) && !existing_keys.is_set(j))
5707 trace_key.add_utf8(key_info[j].name);
5708 }
5709 trace_summary.add_alnum("cause", cause);
5710 #endif
5711 }
5712
5713
5714 /**
5715 Discover the indexes that might be used for GROUP BY or DISTINCT queries.
5716
5717 If the query has a GROUP BY clause, find all indexes that contain
5718 all GROUP BY fields, and add those indexes to join_tab->const_keys
5719 and join_tab->keys.
5720
5721 If the query has a DISTINCT clause, find all indexes that contain
5722 all SELECT fields, and add those indexes to join_tab->const_keys and
5723 join_tab->keys. This allows later on such queries to be processed by
5724 a QUICK_GROUP_MIN_MAX_SELECT.
5725
5726 Note that indexes that are not usable for resolving GROUP
5727 BY/DISTINCT may also be added in some corner cases. For example, an
5728 index covering 'a' and 'b' is not usable for the following query but
5729 is still added: "SELECT DISTINCT a+b FROM t1". This is not a big
5730 issue because a) although the optimizer will consider using the
5731 index, it will not chose it (so minor calculation cost added but not
5732 wrong result) and b) it applies only to corner cases.
5733
5734 @param join
5735 @param join_tab
5736
5737 @return
5738 None
5739 */
5740
5741 static void
add_group_and_distinct_keys(JOIN * join,JOIN_TAB * join_tab)5742 add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab)
5743 {
5744 List<Item_field> indexed_fields;
5745 List_iterator<Item_field> indexed_fields_it(indexed_fields);
5746 ORDER *cur_group;
5747 Item_field *cur_item;
5748 const char *cause;
5749
5750 if (join->group_list)
5751 { /* Collect all query fields referenced in the GROUP clause. */
5752 for (cur_group= join->group_list; cur_group; cur_group= cur_group->next)
5753 (*cur_group->item)->walk(&Item::collect_item_field_processor, 0,
5754 (uchar*) &indexed_fields);
5755 cause= "group_by";
5756 }
5757 else if (join->select_distinct)
5758 { /* Collect all query fields referenced in the SELECT clause. */
5759 List<Item> &select_items= join->fields_list;
5760 List_iterator<Item> select_items_it(select_items);
5761 Item *item;
5762 while ((item= select_items_it++))
5763 item->walk(&Item::collect_item_field_processor, 0,
5764 (uchar*) &indexed_fields);
5765 cause= "distinct";
5766 }
5767 else if (join->tmp_table_param.sum_func_count &&
5768 is_indexed_agg_distinct(join, &indexed_fields))
5769 {
5770 /*
5771 SELECT list with AGGFN(distinct col). The query qualifies for
5772 loose index scan, and is_indexed_agg_distinct() has already
5773 collected all referenced fields into indexed_fields.
5774 */
5775 join->sort_and_group= 1;
5776 cause= "indexed_distinct_aggregate";
5777 }
5778 else
5779 return;
5780
5781 if (indexed_fields.elements == 0)
5782 return;
5783
5784 key_map possible_keys;
5785 possible_keys.set_all();
5786
5787 /* Intersect the keys of all group fields. */
5788 while ((cur_item= indexed_fields_it++))
5789 {
5790 if (cur_item->used_tables() != join_tab->table->map)
5791 {
5792 /*
5793 Doing GROUP BY or DISTINCT on a field in another table so no
5794 index in this table is usable
5795 */
5796 return;
5797 }
5798 else
5799 possible_keys.intersect(cur_item->field->part_of_key);
5800 }
5801
5802 /*
5803 At this point, possible_keys has key bits set only for usable
5804 indexes because indexed_fields is non-empty and if any of the
5805 fields belong to a different table the function would exit in the
5806 loop above.
5807 */
5808
5809 if (!possible_keys.is_clear_all() &&
5810 !possible_keys.is_subset(join_tab->const_keys))
5811 {
5812 trace_indices_added_group_distinct(&join->thd->opt_trace, join_tab,
5813 possible_keys, cause);
5814 join_tab->const_keys.merge(possible_keys);
5815 join_tab->keys.merge(possible_keys);
5816 }
5817
5818 }
5819
5820 /**
5821 Update keyuse array with all possible keys we can use to fetch rows.
5822
5823 @param thd
5824 @param[out] keyuse Put here ordered array of Key_use structures
5825 @param join_tab Array in tablenr_order
5826 @param tables Number of tables in join
5827 @param cond WHERE condition (note that the function analyzes
5828 join_tab[i]->join_cond() too)
5829 @param normal_tables Tables not inner w.r.t some outer join (ones
5830 for which we can make ref access based the WHERE
5831 clause)
5832 @param select_lex current SELECT
5833 @param[out] sargables Array of found sargable candidates
5834
5835 @retval
5836 0 OK
5837 @retval
5838 1 Out of memory.
5839 */
5840
5841 static bool
update_ref_and_keys(THD * thd,Key_use_array * keyuse,JOIN_TAB * join_tab,uint tables,Item * cond,COND_EQUAL * cond_equal,table_map normal_tables,SELECT_LEX * select_lex,SARGABLE_PARAM ** sargables)5842 update_ref_and_keys(THD *thd, Key_use_array *keyuse,JOIN_TAB *join_tab,
5843 uint tables, Item *cond, COND_EQUAL *cond_equal,
5844 table_map normal_tables, SELECT_LEX *select_lex,
5845 SARGABLE_PARAM **sargables)
5846 {
5847 uint and_level,i,found_eq_constant;
5848 Key_field *key_fields, *end, *field;
5849 uint sz;
5850 uint m= max(select_lex->max_equal_elems, 1U);
5851
5852 /*
5853 We use the same piece of memory to store both Key_field
5854 and SARGABLE_PARAM structure.
5855 Key_field values are placed at the beginning this memory
5856 while SARGABLE_PARAM values are put at the end.
5857 All predicates that are used to fill arrays of Key_field
5858 and SARGABLE_PARAM structures have at most 2 arguments
5859 except BETWEEN predicates that have 3 arguments and
5860 IN predicates.
5861 This any predicate if it's not BETWEEN/IN can be used
5862 directly to fill at most 2 array elements, either of Key_field
5863 or SARGABLE_PARAM type. For a BETWEEN predicate 3 elements
5864 can be filled as this predicate is considered as
5865 saragable with respect to each of its argument.
5866 An IN predicate can require at most 1 element as currently
5867 it is considered as sargable only for its first argument.
5868 Multiple equality can add elements that are filled after
5869 substitution of field arguments by equal fields. There
5870 can be not more than select_lex->max_equal_elems such
5871 substitutions.
5872 */
5873 sz= max(sizeof(Key_field), sizeof(SARGABLE_PARAM)) *
5874 (((select_lex->cond_count + 1) * 2 +
5875 select_lex->between_count) * m + 1);
5876 if (!(key_fields=(Key_field*) thd->alloc(sz)))
5877 return TRUE; /* purecov: inspected */
5878 and_level= 0;
5879 field= end= key_fields;
5880 *sargables= (SARGABLE_PARAM *) key_fields +
5881 (sz - sizeof((*sargables)[0].field))/sizeof(SARGABLE_PARAM);
5882 /* set a barrier for the array of SARGABLE_PARAM */
5883 (*sargables)[0].field= 0;
5884
5885 if (cond)
5886 {
5887 add_key_fields(join_tab->join, &end, &and_level, cond, normal_tables,
5888 sargables);
5889 for (Key_field *fld= field; fld != end ; fld++)
5890 {
5891 /* Mark that we can optimize LEFT JOIN */
5892 if (fld->val->type() == Item::NULL_ITEM &&
5893 !fld->field->real_maybe_null())
5894 {
5895 /*
5896 Example:
5897 SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.a WHERE t2.a IS NULL;
5898 this just wants rows of t1 where t1.a does not exist in t2.
5899 */
5900 fld->field->table->reginfo.not_exists_optimize=1;
5901 }
5902 }
5903 }
5904
5905 for (i=0 ; i < tables ; i++)
5906 {
5907 /*
5908 Block the creation of keys for inner tables of outer joins.
5909 Here only the outer joins that can not be converted to
5910 inner joins are left and all nests that can be eliminated
5911 are flattened.
5912 In the future when we introduce conditional accesses
5913 for inner tables in outer joins these keys will be taken
5914 into account as well.
5915 */
5916 if (*join_tab[i].on_expr_ref)
5917 add_key_fields(join_tab->join, &end, &and_level,
5918 *join_tab[i].on_expr_ref,
5919 join_tab[i].table->map, sargables);
5920 }
5921
5922 /* Process ON conditions for the nested joins */
5923 {
5924 List_iterator<TABLE_LIST> li(*join_tab->join->join_list);
5925 TABLE_LIST *table;
5926 while ((table= li++))
5927 {
5928 if (table->nested_join)
5929 add_key_fields_for_nj(join_tab->join, table, &end, &and_level,
5930 sargables);
5931 }
5932 }
5933
5934 /* Generate keys descriptions for derived tables */
5935 if (select_lex->materialized_table_count)
5936 {
5937 if (select_lex->join->generate_derived_keys())
5938 return true;
5939 }
5940 /* fill keyuse with found key parts */
5941 for ( ; field != end ; field++)
5942 {
5943 if (add_key_part(keyuse,field))
5944 return true;
5945 }
5946
5947 if (select_lex->ftfunc_list->elements)
5948 {
5949 if (add_ft_keys(keyuse,join_tab,cond,normal_tables))
5950 return true;
5951 }
5952
5953 /*
5954 Sort the array of possible keys and remove the following key parts:
5955 - ref if there is a keypart which is a ref and a const.
5956 (e.g. if there is a key(a,b) and the clause is a=3 and b=7 and b=t2.d,
5957 then we skip the key part corresponding to b=t2.d)
5958 - keyparts without previous keyparts
5959 (e.g. if there is a key(a,b,c) but only b < 5 (or a=2 and c < 3) is
5960 used in the query, we drop the partial key parts from consideration).
5961 Special treatment for ft-keys.
5962 */
5963 if (!keyuse->empty())
5964 {
5965 Key_use *save_pos, *use;
5966
5967 my_qsort(keyuse->begin(), keyuse->size(), keyuse->element_size(),
5968 reinterpret_cast<qsort_cmp>(sort_keyuse));
5969
5970 const Key_use key_end(NULL, NULL, 0, 0, 0, 0, 0, 0, false, NULL, 0);
5971 if (keyuse->push_back(key_end)) // added for easy testing
5972 return TRUE;
5973
5974 use= save_pos= keyuse->begin();
5975 const Key_use *prev= &key_end;
5976 found_eq_constant=0;
5977 for (i=0 ; i < keyuse->size()-1 ; i++,use++)
5978 {
5979 if (!use->used_tables && use->optimize != KEY_OPTIMIZE_REF_OR_NULL)
5980 use->table->const_key_parts[use->key]|= use->keypart_map;
5981 if (use->keypart != FT_KEYPART)
5982 {
5983 if (use->key == prev->key && use->table == prev->table)
5984 {
5985 if (prev->keypart+1 < use->keypart ||
5986 (prev->keypart == use->keypart && found_eq_constant))
5987 continue; /* remove */
5988 }
5989 else if (use->keypart != 0) // First found must be 0
5990 continue;
5991 }
5992
5993 #if defined(__GNUC__) && !MY_GNUC_PREREQ(4,4)
5994 /*
5995 Old gcc used a memcpy(), which is undefined if save_pos==use:
5996 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19410
5997 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39480
5998 */
5999 if (save_pos != use)
6000 #endif
6001 *save_pos= *use;
6002 prev=use;
6003 found_eq_constant= !use->used_tables;
6004 /* Save ptr to first use */
6005 if (!use->table->reginfo.join_tab->keyuse)
6006 use->table->reginfo.join_tab->keyuse=save_pos;
6007 use->table->reginfo.join_tab->checked_keys.set_bit(use->key);
6008 save_pos++;
6009 }
6010 i= (uint) (save_pos - keyuse->begin());
6011 keyuse->at(i) = key_end;
6012 keyuse->chop(i);
6013 }
6014 print_keyuse_array(&thd->opt_trace, keyuse);
6015
6016 return false;
6017 }
6018
6019
6020 /**
6021 Create a keyuse array for a table with a primary key.
6022 To be used when creating a materialized temporary table.
6023
6024 @param thd THD pointer, for memory allocation
6025 @param table Table object representing table
6026 @param keyparts Number of key parts in the primary key
6027 @param outer_exprs List of items used for key lookup
6028
6029 @return Pointer to created keyuse array, or NULL if error
6030 */
create_keyuse_for_table(THD * thd,TABLE * table,uint keyparts,Item_field ** fields,List<Item> outer_exprs)6031 Key_use_array *create_keyuse_for_table(THD *thd, TABLE *table, uint keyparts,
6032 Item_field **fields,
6033 List<Item> outer_exprs)
6034 {
6035 void *mem= thd->alloc(sizeof(Key_use_array));
6036 if (!mem)
6037 return NULL;
6038 Key_use_array *keyuses= new (mem) Key_use_array(thd->mem_root);
6039
6040 List_iterator<Item> outer_expr(outer_exprs);
6041
6042 for (uint keypartno= 0; keypartno < keyparts; keypartno++)
6043 {
6044 Item *const item= outer_expr++;
6045 Key_field key_field(fields[keypartno]->field, item, 0, 0, true,
6046 // null_rejecting must be true for field items only,
6047 // add_not_null_conds() is incapable of handling
6048 // other item types.
6049 (item->type() == Item::FIELD_ITEM),
6050 NULL, UINT_MAX);
6051 if (add_key_part(keyuses, &key_field))
6052 return NULL;
6053 }
6054 const Key_use key_end(NULL, NULL, 0, 0, 0, 0, 0, 0, false, NULL, 0);
6055 if (keyuses->push_back(key_end)) // added for easy testing
6056 return NULL;
6057
6058 return keyuses;
6059 }
6060
6061
6062 /** Save const tables first as used tables. */
6063
6064 static void
set_position(JOIN * join,uint idx,JOIN_TAB * table,Key_use * key)6065 set_position(JOIN *join, uint idx, JOIN_TAB *table, Key_use *key)
6066 {
6067 join->positions[idx].table= table;
6068 join->positions[idx].key=key;
6069 join->positions[idx].records_read=1.0; /* This is a const table */
6070 join->positions[idx].ref_depend_map= 0;
6071
6072 join->positions[idx].loosescan_key= MAX_KEY; /* Not a LooseScan */
6073 join->positions[idx].sj_strategy= SJ_OPT_NONE;
6074 join->positions[idx].use_join_buffer= FALSE;
6075
6076 /* Move the const table as down as possible in best_ref */
6077 JOIN_TAB **pos=join->best_ref+idx+1;
6078 JOIN_TAB *next=join->best_ref[idx];
6079 for (;next != table ; pos++)
6080 {
6081 JOIN_TAB *tmp=pos[0];
6082 pos[0]=next;
6083 next=tmp;
6084 }
6085 join->best_ref[idx]=table;
6086 }
6087
6088
6089 /**
6090 Fill in outer join related info for the execution plan structure.
6091
6092 For each outer join operation left after simplification of the
6093 original query the function set up the following pointers in the linear
6094 structure join->join_tab representing the selected execution plan.
6095 The first inner table t0 for the operation is set to refer to the last
6096 inner table tk through the field t0->last_inner.
6097 Any inner table ti for the operation are set to refer to the first
6098 inner table ti->first_inner.
6099 The first inner table t0 for the operation is set to refer to the
6100 first inner table of the embedding outer join operation, if there is any,
6101 through the field t0->first_upper.
6102 The on expression for the outer join operation is attached to the
6103 corresponding first inner table through the field t0->on_expr_ref.
6104 Here ti are structures of the JOIN_TAB type.
6105
6106 EXAMPLE. For the query:
6107 @code
6108 SELECT * FROM t1
6109 LEFT JOIN
6110 (t2, t3 LEFT JOIN t4 ON t3.a=t4.a)
6111 ON (t1.a=t2.a AND t1.b=t3.b)
6112 WHERE t1.c > 5,
6113 @endcode
6114
6115 given the execution plan with the table order t1,t2,t3,t4
6116 is selected, the following references will be set;
6117 t4->last_inner=[t4], t4->first_inner=[t4], t4->first_upper=[t2]
6118 t2->last_inner=[t4], t2->first_inner=t3->first_inner=[t2],
6119 on expression (t1.a=t2.a AND t1.b=t3.b) will be attached to
6120 *t2->on_expr_ref, while t3.a=t4.a will be attached to *t4->on_expr_ref.
6121
6122 @param join reference to the info fully describing the query
6123
6124 @note
6125 The function assumes that the simplification procedure has been
6126 already applied to the join query (see simplify_joins).
6127 This function can be called only after the execution plan
6128 has been chosen.
6129 */
6130
6131 static void
make_outerjoin_info(JOIN * join)6132 make_outerjoin_info(JOIN *join)
6133 {
6134 DBUG_ENTER("make_outerjoin_info");
6135
6136 DBUG_ASSERT(join->outer_join);
6137
6138 for (uint i= join->const_tables; i < join->tables; i++)
6139 {
6140 JOIN_TAB *const tab= join->join_tab + i;
6141 TABLE *const table= tab->table;
6142
6143 if (!table)
6144 continue;
6145
6146 TABLE_LIST *const tbl= table->pos_in_table_list;
6147
6148 if (tbl->outer_join)
6149 {
6150 /*
6151 Table tab is the only one inner table for outer join.
6152 (Like table t4 for the table reference t3 LEFT JOIN t4 ON t3.a=t4.a
6153 is in the query above.)
6154 */
6155 tab->last_inner= tab->first_inner= tab;
6156 tab->on_expr_ref= tbl->join_cond_ref();
6157 tab->cond_equal= tbl->cond_equal;
6158 /*
6159 If this outer join nest is embedded in another join nest,
6160 link the join-tabs:
6161 */
6162 TABLE_LIST *const outer_join_nest= tbl->outer_join_nest();
6163 if (outer_join_nest)
6164 tab->first_upper= outer_join_nest->nested_join->first_nested;
6165 }
6166 for (TABLE_LIST *embedding= tbl->embedding;
6167 embedding;
6168 embedding= embedding->embedding)
6169 {
6170 // Ignore join nests that are not outer join nests:
6171 if (!embedding->join_cond())
6172 continue;
6173 NESTED_JOIN *const nested_join= embedding->nested_join;
6174 if (!nested_join->nj_counter)
6175 {
6176 /*
6177 Table tab is the first inner table for nested_join.
6178 Save reference to it in the nested join structure.
6179 */
6180 nested_join->first_nested= tab;
6181 tab->on_expr_ref= embedding->join_cond_ref();
6182 tab->cond_equal= tbl->cond_equal;
6183
6184 TABLE_LIST *const outer_join_nest= embedding->outer_join_nest();
6185 if (outer_join_nest)
6186 tab->first_upper= outer_join_nest->nested_join->first_nested;
6187 }
6188 if (!tab->first_inner)
6189 tab->first_inner= nested_join->first_nested;
6190 if (++nested_join->nj_counter < nested_join->nj_total)
6191 break;
6192 /* Table tab is the last inner table for nested join. */
6193 nested_join->first_nested->last_inner= tab;
6194 }
6195 }
6196 DBUG_VOID_RETURN;
6197 }
6198
6199 /**
6200 Build a predicate guarded by match variables for embedding outer joins.
6201 The function recursively adds guards for predicate cond
6202 assending from tab to the first inner table next embedding
6203 nested outer join and so on until it reaches root_tab
6204 (root_tab can be 0).
6205
6206 @param tab the first inner table for most nested outer join
6207 @param cond the predicate to be guarded (must be set)
6208 @param root_tab the first inner table to stop
6209
6210 @return
6211 - pointer to the guarded predicate, if success
6212 - 0, otherwise
6213 */
6214
6215 static Item*
add_found_match_trig_cond(JOIN_TAB * tab,Item * cond,JOIN_TAB * root_tab)6216 add_found_match_trig_cond(JOIN_TAB *tab, Item *cond, JOIN_TAB *root_tab)
6217 {
6218 Item *tmp;
6219 DBUG_ASSERT(cond != 0);
6220 if (tab == root_tab)
6221 return cond;
6222 if ((tmp= add_found_match_trig_cond(tab->first_upper, cond, root_tab)))
6223 tmp= new Item_func_trig_cond(tmp, &tab->found, tab,
6224 Item_func_trig_cond::FOUND_MATCH);
6225 if (tmp)
6226 {
6227 tmp->quick_fix_field();
6228 tmp->update_used_tables();
6229 }
6230 return tmp;
6231 }
6232
6233
6234 /**
6235 Local helper function for make_join_select().
6236
6237 Push down conditions from all on expressions.
6238 Each of these conditions are guarded by a variable
6239 that turns if off just before null complemented row for
6240 outer joins is formed. Thus, the condition from an
6241 'on expression' are guaranteed not to be checked for
6242 the null complemented row.
6243 */
pushdown_on_conditions(JOIN * join,JOIN_TAB * last_tab)6244 static bool pushdown_on_conditions(JOIN* join, JOIN_TAB *last_tab)
6245 {
6246 DBUG_ENTER("pushdown_on_conditions");
6247
6248 /* First push down constant conditions from on expressions */
6249 for (JOIN_TAB *join_tab= join->join_tab+join->const_tables;
6250 join_tab < join->join_tab+join->tables ; join_tab++)
6251 {
6252 if (join_tab->on_expr_ref && *join_tab->on_expr_ref)
6253 {
6254 JOIN_TAB *cond_tab= join_tab->first_inner;
6255 Item *tmp_cond= make_cond_for_table(*join_tab->on_expr_ref,
6256 join->const_table_map,
6257 (table_map) 0, 0);
6258 if (!tmp_cond)
6259 continue;
6260 tmp_cond= new
6261 Item_func_trig_cond(tmp_cond, &cond_tab->not_null_compl, cond_tab,
6262 Item_func_trig_cond::IS_NOT_NULL_COMPL);
6263 if (!tmp_cond)
6264 DBUG_RETURN(true);
6265 tmp_cond->quick_fix_field();
6266
6267 if (cond_tab->and_with_jt_and_sel_condition(tmp_cond, __LINE__))
6268 DBUG_RETURN(true);
6269 }
6270 }
6271
6272 JOIN_TAB *first_inner_tab= last_tab->first_inner;
6273
6274 /* Push down non-constant conditions from on expressions */
6275 while (first_inner_tab && first_inner_tab->last_inner == last_tab)
6276 {
6277 /*
6278 Table last_tab is the last inner table of an outer join.
6279 An on expression is always attached to it.
6280 */
6281 Item *on_expr= *first_inner_tab->on_expr_ref;
6282
6283 for (JOIN_TAB *join_tab= join->join_tab+join->const_tables;
6284 join_tab <= last_tab ; join_tab++)
6285 {
6286 table_map prefix_tables= join_tab->prefix_tables();
6287 table_map added_tables= join_tab->added_tables();
6288
6289 if (join_tab == last_tab)
6290 {
6291 /*
6292 Need RAND_TABLE_BIT on the last inner table, in case there is a
6293 non-deterministic function in the join condition.
6294 (RAND_TABLE_BIT is set for the last table of the join plan,
6295 but this is not sufficient for join conditions, which may have a
6296 last inner table that is ahead of the last table of the join plan).
6297 */
6298 prefix_tables|= RAND_TABLE_BIT;
6299 added_tables|= RAND_TABLE_BIT;
6300 }
6301 Item *tmp_cond= make_cond_for_table(on_expr, prefix_tables, added_tables,
6302 false);
6303 if (!tmp_cond)
6304 continue;
6305
6306 JOIN_TAB *cond_tab=
6307 join_tab < first_inner_tab ? first_inner_tab : join_tab;
6308 /*
6309 First add the guards for match variables of
6310 all embedding outer join operations.
6311 */
6312 if (!(tmp_cond= add_found_match_trig_cond(cond_tab->first_inner,
6313 tmp_cond,
6314 first_inner_tab)))
6315 DBUG_RETURN(1);
6316 /*
6317 Now add the guard turning the predicate off for
6318 the null complemented row.
6319 */
6320 tmp_cond=
6321 new Item_func_trig_cond(tmp_cond, &first_inner_tab->not_null_compl,
6322 first_inner_tab,
6323 Item_func_trig_cond::IS_NOT_NULL_COMPL);
6324 if (!tmp_cond)
6325 DBUG_RETURN(true);
6326 tmp_cond->quick_fix_field();
6327
6328 /* Add the predicate to other pushed down predicates */
6329 if (cond_tab->and_with_jt_and_sel_condition(tmp_cond, __LINE__))
6330 DBUG_RETURN(true);
6331 }
6332 first_inner_tab= first_inner_tab->first_upper;
6333 }
6334 DBUG_RETURN(0);
6335 }
6336
6337
6338 /*****************************************************************************
6339 Remove calculation with tables that aren't yet read. Remove also tests
6340 against fields that are read through key where the table is not a
6341 outer join table.
6342 We can't remove tests that are made against columns which are stored
6343 in sorted order.
6344 *****************************************************************************/
6345
6346 static Item *
part_of_refkey(TABLE * table,Field * field)6347 part_of_refkey(TABLE *table,Field *field)
6348 {
6349 if (!table->reginfo.join_tab)
6350 return NULL; // field from outer non-select (UPDATE,...)
6351
6352 uint ref_parts=table->reginfo.join_tab->ref.key_parts;
6353 if (ref_parts)
6354 {
6355 if (table->reginfo.join_tab->has_guarded_conds())
6356 return NULL;
6357
6358 const KEY_PART_INFO *key_part=
6359 table->key_info[table->reginfo.join_tab->ref.key].key_part;
6360
6361 for (uint part=0 ; part < ref_parts ; part++,key_part++)
6362 if (field->eq(key_part->field) &&
6363 !(key_part->key_part_flag & HA_PART_KEY_SEG))
6364 return table->reginfo.join_tab->ref.items[part];
6365 }
6366 return NULL;
6367 }
6368
6369
6370 /**
6371 @return
6372 1 if right_item is used removable reference key on left_item
6373
6374 @note see comments in make_cond_for_table_from_pred() about careful
6375 usage/modifications of test_if_ref().
6376 */
6377
test_if_ref(Item * root_cond,Item_field * left_item,Item * right_item)6378 static bool test_if_ref(Item *root_cond,
6379 Item_field *left_item,Item *right_item)
6380 {
6381 Field *field=left_item->field;
6382 JOIN_TAB *join_tab= field->table->reginfo.join_tab;
6383 // No need to change const test
6384 if (!field->table->const_table && join_tab &&
6385 (!join_tab->first_inner ||
6386 *join_tab->first_inner->on_expr_ref == root_cond) &&
6387 /* "ref_or_null" implements "x=y or x is null", not "x=y" */
6388 (join_tab->type != JT_REF_OR_NULL))
6389 {
6390 Item *ref_item=part_of_refkey(field->table,field);
6391 if (ref_item && ref_item->eq(right_item,1))
6392 {
6393 right_item= right_item->real_item();
6394 if (right_item->type() == Item::FIELD_ITEM)
6395 return (field->eq_def(((Item_field *) right_item)->field));
6396 /* remove equalities injected by IN->EXISTS transformation */
6397 else if (right_item->type() == Item::CACHE_ITEM)
6398 return ((Item_cache *)right_item)->eq_def (field);
6399 if (right_item->const_item() && !(right_item->is_null()))
6400 {
6401 /*
6402 We can remove binary fields and numerical fields except float,
6403 as float comparison isn't 100 % secure
6404 We have to keep normal strings to be able to check for end spaces
6405
6406 sergefp: the above seems to be too restrictive. Counterexample:
6407 create table t100 (v varchar(10), key(v)) default charset=latin1;
6408 insert into t100 values ('a'),('a ');
6409 explain select * from t100 where v='a';
6410 The EXPLAIN shows 'using Where'. Running the query returns both
6411 rows, so it seems there are no problems with endspace in the most
6412 frequent case?
6413 */
6414 if (field->binary() &&
6415 field->real_type() != MYSQL_TYPE_STRING &&
6416 field->real_type() != MYSQL_TYPE_VARCHAR &&
6417 (field->type() != MYSQL_TYPE_FLOAT || field->decimals() == 0))
6418 {
6419 return !right_item->save_in_field_no_warnings(field, true);
6420 }
6421 }
6422 }
6423 }
6424 return 0; // keep test
6425 }
6426
6427 /**
6428 Extract a condition that can be checked after reading given table
6429
6430 @param cond Condition to analyze
6431 @param tables Tables for which "current field values" are available
6432 @param used_table Table that we're extracting the condition for (may
6433 also include PSEUDO_TABLE_BITS, and may be zero)
6434 @param exclude_expensive_cond Do not push expensive conditions
6435
6436 @retval <>NULL Generated condition
6437 @retval =NULL Already checked, OR error
6438
6439 @details
6440 Extract the condition that can be checked after reading the table
6441 specified in 'used_table', given that current-field values for tables
6442 specified in 'tables' bitmap are available.
6443 If 'used_table' is 0
6444 - extract conditions for all tables in 'tables'.
6445 - extract conditions are unrelated to any tables
6446 in the same query block/level(i.e. conditions
6447 which have used_tables == 0).
6448
6449 The function assumes that
6450 - Constant parts of the condition has already been checked.
6451 - Condition that could be checked for tables in 'tables' has already
6452 been checked.
6453
6454 The function takes into account that some parts of the condition are
6455 guaranteed to be true by employed 'ref' access methods (the code that
6456 does this is located at the end, search down for "EQ_FUNC").
6457
6458 @note
6459 make_cond_for_info_schema() uses an algorithm similar to
6460 make_cond_for_table().
6461 */
6462
6463 /**
6464 Destructively replaces a sub-condition inside a condition tree. The
6465 parse tree is also altered.
6466
6467 @note Because of current requirements for semijoin flattening, we do not
6468 need to recurse here, hence this function will only examine the top-level
6469 AND conditions. (see JOIN::prepare, comment starting with "Check if the
6470 subquery predicate can be executed via materialization".)
6471
6472 @param join The top-level query.
6473
6474 @param tree Must be the handle to the top level condition. This is needed
6475 when the top-level condition changes.
6476
6477 @param old_cond The condition to be replaced.
6478
6479 @param new_cond The condition to be substituted.
6480
6481 @param do_fix_fields If true, Item::fix_fields(THD*, Item**) is called for
6482 the new condition.
6483
6484 @return error status
6485
6486 @retval true If there was an error.
6487 @retval false If successful.
6488 */
6489
replace_subcondition(JOIN * join,Item ** tree,Item * old_cond,Item * new_cond,bool do_fix_fields)6490 static bool replace_subcondition(JOIN *join, Item **tree,
6491 Item *old_cond, Item *new_cond,
6492 bool do_fix_fields)
6493 {
6494 if (*tree == old_cond)
6495 {
6496 *tree= new_cond;
6497 if (do_fix_fields && new_cond->fix_fields(join->thd, tree))
6498 return TRUE;
6499 join->select_lex->where= *tree;
6500 return FALSE;
6501 }
6502 else if ((*tree)->type() == Item::COND_ITEM)
6503 {
6504 List_iterator<Item> li(*((Item_cond*)(*tree))->argument_list());
6505 Item *item;
6506 while ((item= li++))
6507 {
6508 if (item == old_cond)
6509 {
6510 li.replace(new_cond);
6511 if (do_fix_fields && new_cond->fix_fields(join->thd, li.ref()))
6512 return TRUE;
6513 return FALSE;
6514 }
6515 }
6516 }
6517 else
6518 // If we came here it means there were an error during prerequisites check.
6519 DBUG_ASSERT(FALSE);
6520
6521 return TRUE;
6522 }
6523
6524
subq_sj_candidate_cmp(Item_exists_subselect * const * el1,Item_exists_subselect * const * el2)6525 static int subq_sj_candidate_cmp(Item_exists_subselect* const *el1,
6526 Item_exists_subselect* const *el2)
6527 {
6528 /*
6529 Remove this assert when we support semijoin on non-IN subqueries.
6530 */
6531 DBUG_ASSERT((*el1)->substype() == Item_subselect::IN_SUBS &&
6532 (*el2)->substype() == Item_subselect::IN_SUBS);
6533 return ((*el1)->sj_convert_priority < (*el2)->sj_convert_priority) ? 1 :
6534 ( ((*el1)->sj_convert_priority == (*el2)->sj_convert_priority)? 0 : -1);
6535 }
6536
6537
fix_list_after_tbl_changes(st_select_lex * parent_select,st_select_lex * removed_select,List<TABLE_LIST> * tlist)6538 static void fix_list_after_tbl_changes(st_select_lex *parent_select,
6539 st_select_lex *removed_select,
6540 List<TABLE_LIST> *tlist)
6541 {
6542 List_iterator<TABLE_LIST> it(*tlist);
6543 TABLE_LIST *table;
6544 while ((table= it++))
6545 {
6546 if (table->join_cond())
6547 table->join_cond()->fix_after_pullout(parent_select, removed_select);
6548 if (table->nested_join)
6549 fix_list_after_tbl_changes(parent_select, removed_select,
6550 &table->nested_join->join_list);
6551 }
6552 }
6553
6554
6555 /**
6556 Convert a subquery predicate into a TABLE_LIST semi-join nest
6557
6558 @param parent_join Parent join, which has subq_pred in its WHERE/ON clause.
6559 @param subq_pred Subquery predicate to be converted.
6560 This is either an IN, =ANY or EXISTS predicate.
6561
6562 @retval FALSE OK
6563 @retval TRUE Error
6564
6565 @details
6566
6567 The following transformations are performed:
6568
6569 1. IN/=ANY predicates on the form:
6570
6571 SELECT ...
6572 FROM ot1 ... otN
6573 WHERE (oe1, ... oeM) IN (SELECT ie1, ..., ieM)
6574 FROM it1 ... itK
6575 [WHERE inner-cond])
6576 [AND outer-cond]
6577 [GROUP BY ...] [HAVING ...] [ORDER BY ...]
6578
6579 are transformed into:
6580
6581 SELECT ...
6582 FROM (ot1 ... otN) SJ (it1 ... itK)
6583 ON (oe1, ... oeM) = (ie1, ..., ieM)
6584 [AND inner-cond]
6585 [WHERE outer-cond]
6586 [GROUP BY ...] [HAVING ...] [ORDER BY ...]
6587
6588 Notice that the inner-cond may contain correlated and non-correlated
6589 expressions. Further transformations will analyze and break up such
6590 expressions.
6591
6592 Prepared Statements: the transformation is permanent:
6593 - Changes in TABLE_LIST structures are naturally permanent
6594 - Item tree changes are performed on statement MEM_ROOT:
6595 = we activate statement MEM_ROOT
6596 = this function is called before the first fix_prepare_information call.
6597
6598 This is intended because the criteria for subquery-to-sj conversion remain
6599 constant for the lifetime of the Prepared Statement.
6600 */
6601
convert_subquery_to_semijoin(JOIN * parent_join,Item_exists_subselect * subq_pred)6602 static bool convert_subquery_to_semijoin(JOIN *parent_join,
6603 Item_exists_subselect *subq_pred)
6604 {
6605 SELECT_LEX *parent_lex= parent_join->select_lex;
6606 TABLE_LIST *emb_tbl_nest= NULL;
6607 List<TABLE_LIST> *emb_join_list= &parent_lex->top_join_list;
6608 THD *thd= parent_join->thd;
6609 DBUG_ENTER("convert_subquery_to_semijoin");
6610
6611 DBUG_ASSERT(subq_pred->substype() == Item_subselect::IN_SUBS);
6612
6613 /*
6614 Find out where to insert the semi-join nest and the generated condition.
6615
6616 For t1 LEFT JOIN t2, embedding_join_nest will be t2.
6617 Note that t2 may be a simple table or may itself be a join nest
6618 (e.g. in the case t1 LEFT JOIN (t2 JOIN t3))
6619 */
6620 if ((void*)subq_pred->embedding_join_nest != NULL)
6621 {
6622 if (subq_pred->embedding_join_nest->nested_join)
6623 {
6624 /*
6625 We're dealing with
6626
6627 ... [LEFT] JOIN ( ... ) ON (subquery AND condition) ...
6628
6629 The sj-nest will be inserted into the brackets nest.
6630 */
6631 emb_tbl_nest= subq_pred->embedding_join_nest;
6632 emb_join_list= &emb_tbl_nest->nested_join->join_list;
6633 }
6634 else if (!subq_pred->embedding_join_nest->outer_join)
6635 {
6636 /*
6637 We're dealing with
6638
6639 ... INNER JOIN tblX ON (subquery AND condition) ...
6640
6641 The sj-nest will be tblX's "sibling", i.e. another child of its
6642 parent. This is ok because tblX is joined as an inner join.
6643 */
6644 emb_tbl_nest= subq_pred->embedding_join_nest->embedding;
6645 if (emb_tbl_nest)
6646 emb_join_list= &emb_tbl_nest->nested_join->join_list;
6647 }
6648 else if (!subq_pred->embedding_join_nest->nested_join)
6649 {
6650 TABLE_LIST *outer_tbl= subq_pred->embedding_join_nest;
6651 /*
6652 We're dealing with
6653
6654 ... LEFT JOIN tbl ON (on_expr AND subq_pred) ...
6655
6656 we'll need to convert it into:
6657
6658 ... LEFT JOIN ( tbl SJ (subq_tables) ) ON (on_expr AND subq_pred) ...
6659 | |
6660 |<----- wrap_nest ---->|
6661
6662 Q: other subqueries may be pointing to this element. What to do?
6663 A1: simple solution: copy *subq_pred->embedding_join_nest= *parent_nest.
6664 But we'll need to fix other pointers.
6665 A2: Another way: have TABLE_LIST::next_ptr so the following
6666 subqueries know the table has been nested.
6667 A3: changes in the TABLE_LIST::outer_join will make everything work
6668 automatically.
6669 */
6670 TABLE_LIST *const wrap_nest=
6671 TABLE_LIST::new_nested_join(thd->mem_root, "(sj-wrap)",
6672 outer_tbl->embedding, outer_tbl->join_list,
6673 parent_lex);
6674 if (wrap_nest == NULL)
6675 DBUG_RETURN(true);
6676
6677 wrap_nest->nested_join->join_list.push_back(outer_tbl);
6678
6679 outer_tbl->embedding= wrap_nest;
6680 outer_tbl->join_list= &wrap_nest->nested_join->join_list;
6681
6682 /*
6683 wrap_nest will take place of outer_tbl, so move the outer join flag
6684 and join condition.
6685 */
6686 wrap_nest->outer_join= outer_tbl->outer_join;
6687 outer_tbl->outer_join= 0;
6688
6689 wrap_nest->set_join_cond(outer_tbl->join_cond());
6690 outer_tbl->set_join_cond(NULL);
6691
6692 List_iterator<TABLE_LIST> li(*wrap_nest->join_list);
6693 TABLE_LIST *tbl;
6694 while ((tbl= li++))
6695 {
6696 if (tbl == outer_tbl)
6697 {
6698 li.replace(wrap_nest);
6699 break;
6700 }
6701 }
6702
6703 /*
6704 outer_tbl is replaced by wrap_nest.
6705 For subselects, update embedding_join_nest to point to wrap_nest
6706 instead of outer_tbl.
6707 */
6708 for (Item_exists_subselect **subquery= parent_join->sj_subselects.begin();
6709 subquery < parent_join->sj_subselects.end();
6710 subquery++)
6711 {
6712 if ((*subquery)->embedding_join_nest == outer_tbl)
6713 (*subquery)->embedding_join_nest= wrap_nest;
6714 }
6715
6716 /*
6717 Ok now wrap_nest 'contains' outer_tbl and we're ready to add the
6718 semi-join nest into it
6719 */
6720 emb_join_list= &wrap_nest->nested_join->join_list;
6721 emb_tbl_nest= wrap_nest;
6722 }
6723 }
6724
6725 TABLE_LIST *const sj_nest=
6726 TABLE_LIST::new_nested_join(thd->mem_root, "(sj-nest)",
6727 emb_tbl_nest, emb_join_list, parent_lex);
6728 if (sj_nest == NULL)
6729 DBUG_RETURN(true);
6730
6731 NESTED_JOIN *const nested_join= sj_nest->nested_join;
6732
6733 /* Nests do not participate in those 'chains', so: */
6734 /* sj_nest->next_leaf= sj_nest->next_local= sj_nest->next_global == NULL*/
6735 emb_join_list->push_back(sj_nest);
6736
6737 /*
6738 nested_join->used_tables and nested_join->not_null_tables are
6739 initialized in simplify_joins().
6740 */
6741
6742 /*
6743 2. Walk through subquery's top list and set 'embedding' to point to the
6744 sj-nest.
6745 */
6746 st_select_lex *subq_lex= subq_pred->unit->first_select();
6747 nested_join->query_block_id= subq_lex->select_number;
6748 nested_join->join_list.empty();
6749 List_iterator_fast<TABLE_LIST> li(subq_lex->top_join_list);
6750 TABLE_LIST *tl;
6751 while ((tl= li++))
6752 {
6753 tl->embedding= sj_nest;
6754 tl->join_list= &nested_join->join_list;
6755 nested_join->join_list.push_back(tl);
6756 }
6757
6758 /*
6759 Reconnect the next_leaf chain.
6760 TODO: Do we have to put subquery's tables at the end of the chain?
6761 Inserting them at the beginning would be a bit faster.
6762 NOTE: We actually insert them at the front! That's because the order is
6763 reversed in this list.
6764 */
6765 for (tl= parent_lex->leaf_tables; tl->next_leaf; tl= tl->next_leaf)
6766 {}
6767 tl->next_leaf= subq_lex->leaf_tables;
6768
6769 /*
6770 Same as above for next_local chain. This needed only for re-execution.
6771 (The next_local chain always starts with SELECT_LEX::table_list)
6772 */
6773 for (tl= parent_lex->get_table_list(); tl->next_local; tl= tl->next_local)
6774 {}
6775 tl->next_local= subq_lex->get_table_list();
6776
6777 /* A theory: no need to re-connect the next_global chain */
6778
6779 /* 3. Remove the original subquery predicate from the WHERE/ON */
6780
6781 // The subqueries were replaced for Item_int(1) earlier
6782 /*TODO: also reset the 'with_subselect' there. */
6783
6784 /* n. Adjust the parent_join->tables counter */
6785 uint table_no= parent_join->tables;
6786 /* n. Walk through child's tables and adjust table->map */
6787 for (tl= subq_lex->leaf_tables; tl; tl= tl->next_leaf, table_no++)
6788 {
6789 tl->table->tablenr= table_no;
6790 tl->table->map= ((table_map)1) << table_no;
6791 SELECT_LEX *old_sl= tl->select_lex;
6792 tl->select_lex= parent_join->select_lex;
6793 for (TABLE_LIST *emb= tl->embedding;
6794 emb && emb->select_lex == old_sl;
6795 emb= emb->embedding)
6796 emb->select_lex= parent_join->select_lex;
6797 }
6798 parent_join->tables+= subq_lex->join->tables;
6799 parent_join->primary_tables+= subq_lex->join->tables;
6800
6801 parent_lex->between_count+= subq_lex->between_count;
6802 parent_lex->cond_count+= subq_lex->cond_count;
6803 parent_lex->derived_table_count+= subq_lex->derived_table_count;
6804 parent_lex->materialized_table_count+= subq_lex->materialized_table_count;
6805 parent_lex->partitioned_table_count+= subq_lex->partitioned_table_count;
6806
6807 nested_join->sj_outer_exprs.empty();
6808 nested_join->sj_inner_exprs.empty();
6809
6810 /*
6811 @todo: Add similar conversion for subqueries other than IN.
6812 */
6813 if (subq_pred->substype() == Item_subselect::IN_SUBS)
6814 {
6815 Item_in_subselect *in_subq_pred= (Item_in_subselect *)subq_pred;
6816
6817 /* Left side of IN predicate is already resolved */
6818 DBUG_ASSERT(in_subq_pred->left_expr->fixed);
6819
6820 in_subq_pred->exec_method= Item_exists_subselect::EXEC_SEMI_JOIN;
6821 /*
6822 sj_corr_tables is supposed to contain non-trivially correlated tables,
6823 but here it is set to contain all correlated tables.
6824 @todo: Add analysis step that assigns only the set of non-trivially
6825 correlated tables to sj_corr_tables.
6826 */
6827 nested_join->sj_corr_tables= subq_pred->used_tables();
6828 /*
6829 sj_depends_on contains the set of outer tables referred in the
6830 subquery's WHERE clause as well as tables referred in the IN predicate's
6831 left-hand side.
6832 */
6833 nested_join->sj_depends_on= subq_pred->used_tables() |
6834 in_subq_pred->left_expr->used_tables();
6835 /* Put the subquery's WHERE into semi-join's condition. */
6836 sj_nest->sj_on_expr= subq_lex->where;
6837
6838 /*
6839 Create the IN-equalities and inject them into semi-join's ON condition.
6840 Additionally, for LooseScan strategy
6841 - Record the number of IN-equalities.
6842 - Create list of pointers to (oe1, ..., ieN). We'll need the list to
6843 see which of the expressions are bound and which are not (for those
6844 we'll produce a distinct stream of (ie_i1,...ie_ik).
6845
6846 (TODO: can we just create a list of pointers and hope the expressions
6847 will not substitute themselves on fix_fields()? or we need to wrap
6848 them into Item_direct_view_refs and store pointers to those. The
6849 pointers to Item_direct_view_refs are guaranteed to be stable as
6850 Item_direct_view_refs doesn't substitute itself with anything in
6851 Item_direct_view_ref::fix_fields.
6852 */
6853
6854 if (in_subq_pred->left_expr->type() == Item::SUBSELECT_ITEM)
6855 {
6856 List<Item> ref_list;
6857 uint i;
6858
6859 Item *header= subq_lex->ref_pointer_array[0];
6860 for (i= 1; i < in_subq_pred->left_expr->cols(); i++)
6861 {
6862 ref_list.push_back(subq_lex->ref_pointer_array[i]);
6863 }
6864
6865 Item_row *right_expr= new Item_row(header, ref_list);
6866
6867 nested_join->sj_outer_exprs.push_back(in_subq_pred->left_expr);
6868 nested_join->sj_inner_exprs.push_back(right_expr);
6869 Item_func_eq *item_eq=
6870 new Item_func_eq(in_subq_pred->left_expr,
6871 right_expr);
6872 if (item_eq == NULL)
6873 DBUG_RETURN(TRUE);
6874
6875 sj_nest->sj_on_expr= and_items(sj_nest->sj_on_expr, item_eq);
6876 if (sj_nest->sj_on_expr == NULL)
6877 DBUG_RETURN(TRUE);
6878 }
6879 else
6880 {
6881 for (uint i= 0; i < in_subq_pred->left_expr->cols(); i++)
6882 {
6883 nested_join->sj_outer_exprs.push_back(in_subq_pred->left_expr->
6884 element_index(i));
6885 nested_join->sj_inner_exprs.push_back(subq_lex->ref_pointer_array[i]);
6886
6887 Item_func_eq *item_eq=
6888 new Item_func_eq(in_subq_pred->left_expr->element_index(i),
6889 subq_lex->ref_pointer_array[i]);
6890 if (item_eq == NULL)
6891 DBUG_RETURN(TRUE);
6892
6893 sj_nest->sj_on_expr= and_items(sj_nest->sj_on_expr, item_eq);
6894 if (sj_nest->sj_on_expr == NULL)
6895 DBUG_RETURN(TRUE);
6896 }
6897 }
6898 /* Fix the created equality and AND */
6899
6900 Opt_trace_array sj_on_trace(&thd->opt_trace,
6901 "evaluating_constant_semijoin_conditions");
6902 sj_nest->sj_on_expr->top_level_item();
6903 if (sj_nest->sj_on_expr->fix_fields(thd, &sj_nest->sj_on_expr))
6904 DBUG_RETURN(true);
6905 }
6906
6907 /* Unlink the child select_lex: */
6908 subq_lex->master_unit()->exclude_level();
6909 parent_lex->removed_select= subq_lex;
6910 /*
6911 Update the resolver context - needed for Item_field objects that have been
6912 replaced in the item tree for this execution, but are still needed for
6913 subsequent executions.
6914 */
6915 for (st_select_lex *select= parent_lex->removed_select;
6916 select != NULL;
6917 select= select->removed_select)
6918 select->context.select_lex= parent_lex;
6919 /*
6920 Walk through sj nest's WHERE and ON expressions and call
6921 item->fix_table_changes() for all items.
6922 */
6923 sj_nest->sj_on_expr->fix_after_pullout(parent_lex, subq_lex);
6924 fix_list_after_tbl_changes(parent_lex, subq_lex,
6925 &sj_nest->nested_join->join_list);
6926
6927 //TODO fix QT_
6928 DBUG_EXECUTE("where",
6929 print_where(sj_nest->sj_on_expr,"SJ-EXPR", QT_ORDINARY););
6930
6931 if (emb_tbl_nest)
6932 {
6933 /* Inject sj_on_expr into the parent's ON condition */
6934 emb_tbl_nest->set_join_cond(and_items(emb_tbl_nest->join_cond(),
6935 sj_nest->sj_on_expr));
6936 if (emb_tbl_nest->join_cond() == NULL)
6937 DBUG_RETURN(true);
6938 emb_tbl_nest->join_cond()->top_level_item();
6939 if (!emb_tbl_nest->join_cond()->fixed &&
6940 emb_tbl_nest->join_cond()->fix_fields(parent_join->thd,
6941 emb_tbl_nest->join_cond_ref()))
6942 DBUG_RETURN(true);
6943 }
6944 else
6945 {
6946 /* Inject sj_on_expr into the parent's WHERE condition */
6947 parent_join->conds= and_items(parent_join->conds, sj_nest->sj_on_expr);
6948 if (parent_join->conds == NULL)
6949 DBUG_RETURN(true);
6950 parent_join->conds->top_level_item();
6951 if (parent_join->conds->fix_fields(parent_join->thd, &parent_join->conds))
6952 DBUG_RETURN(true);
6953 parent_join->select_lex->where= parent_join->conds;
6954 }
6955
6956 if (subq_lex->ftfunc_list->elements)
6957 {
6958 Item_func_match *ifm;
6959 List_iterator_fast<Item_func_match> li(*(subq_lex->ftfunc_list));
6960 while ((ifm= li++))
6961 parent_lex->ftfunc_list->push_front(ifm);
6962 }
6963
6964 DBUG_RETURN(false);
6965 }
6966
6967
6968 /*
6969 Convert semi-join subquery predicates into semi-join join nests
6970
6971 SYNOPSIS
6972 JOIN::flatten_subqueries()
6973
6974 DESCRIPTION
6975
6976 Convert candidate subquery predicates into semi-join join nests. This
6977 transformation is performed once in query lifetime and is irreversible.
6978
6979 Conversion of one subquery predicate
6980 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6981 We start with a join that has a semi-join subquery:
6982
6983 SELECT ...
6984 FROM ot, ...
6985 WHERE oe IN (SELECT ie FROM it1 ... itN WHERE subq_where) AND outer_where
6986
6987 and convert it into a semi-join nest:
6988
6989 SELECT ...
6990 FROM ot SEMI JOIN (it1 ... itN), ...
6991 WHERE outer_where AND subq_where AND oe=ie
6992
6993 that is, in order to do the conversion, we need to
6994
6995 * Create the "SEMI JOIN (it1 .. itN)" part and add it into the parent
6996 query's FROM structure.
6997 * Add "AND subq_where AND oe=ie" into parent query's WHERE (or ON if
6998 the subquery predicate was in an ON expression)
6999 * Remove the subquery predicate from the parent query's WHERE
7000
7001 Considerations when converting many predicates
7002 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
7003 A join may have at most MAX_TABLES tables. This may prevent us from
7004 flattening all subqueries when the total number of tables in parent and
7005 child selects exceeds MAX_TABLES. In addition, one slot is reserved per
7006 semi-join nest, in case the subquery needs to be materialized in a
7007 temporary table.
7008 We deal with this problem by flattening children's subqueries first and
7009 then using a heuristic rule to determine each subquery predicate's
7010 "priority".
7011
7012 RETURN
7013 FALSE OK
7014 TRUE Error
7015 */
7016
flatten_subqueries()7017 bool JOIN::flatten_subqueries()
7018 {
7019 Item_exists_subselect **subq;
7020 Item_exists_subselect **subq_end;
7021 bool outer_join_objection= false;
7022 Opt_trace_context * const trace= &thd->opt_trace;
7023 DBUG_ENTER("JOIN::flatten_subqueries");
7024
7025 if (sj_subselects.empty())
7026 DBUG_RETURN(FALSE);
7027
7028 /* First, convert child join's subqueries. We proceed bottom-up here */
7029 for (subq= sj_subselects.begin(), subq_end= sj_subselects.end();
7030 subq < subq_end;
7031 subq++)
7032 {
7033 /*
7034 Currently, we only support transformation of IN subqueries.
7035 */
7036 DBUG_ASSERT((*subq)->substype() == Item_subselect::IN_SUBS);
7037
7038 st_select_lex *child_select= (*subq)->unit->first_select();
7039 JOIN *child_join= child_select->join;
7040
7041 /*
7042 child_select->where contains only the WHERE predicate of the
7043 subquery itself here. We may be selecting from a VIEW, which has its
7044 own predicate. The combined predicates are available in child_join->conds,
7045 which was built by setup_conds() doing prepare_where() for all views.
7046 */
7047 child_select->where= child_join->conds;
7048
7049 if (child_join->flatten_subqueries())
7050 DBUG_RETURN(TRUE);
7051
7052 (*subq)->sj_convert_priority=
7053 (((*subq)->unit->uncacheable & UNCACHEABLE_DEPENDENT) ? MAX_TABLES : 0) +
7054 child_join->tables;
7055 }
7056
7057 //dump_TABLE_LIST_struct(select_lex, select_lex->leaf_tables);
7058 /*
7059 2. Pick which subqueries to convert:
7060 sort the subquery array
7061 - prefer correlated subqueries over uncorrelated;
7062 - prefer subqueries that have greater number of outer tables;
7063 */
7064 my_qsort(sj_subselects.begin(),
7065 sj_subselects.size(), sj_subselects.element_size(),
7066 reinterpret_cast<qsort_cmp>(subq_sj_candidate_cmp));
7067
7068 Prepared_stmt_arena_holder ps_arena_holder(thd);
7069
7070 // #tables-in-parent-query + #tables-in-subquery + sj nests <= MAX_TABLES
7071 /* Replace all subqueries to be flattened with Item_int(1) */
7072
7073 uint table_count= tables;
7074 for (subq= sj_subselects.begin(); subq < subq_end; subq++)
7075 {
7076 // Add the tables in the subquery nest plus one in case of materialization:
7077 const uint tables_added= (*subq)->unit->first_select()->join->tables + 1;
7078 (*subq)->sj_chosen= table_count + tables_added <= MAX_TABLES;
7079
7080 if (!(*subq)->sj_chosen)
7081 continue;
7082
7083 table_count+= tables_added;
7084
7085 Item **tree= ((*subq)->embedding_join_nest == NULL) ?
7086 &conds : ((*subq)->embedding_join_nest->join_cond_ref());
7087 if (replace_subcondition(this, tree, *subq, new Item_int(1), FALSE))
7088 DBUG_RETURN(TRUE); /* purecov: inspected */
7089 }
7090
7091 for (subq= sj_subselects.begin(); subq < subq_end; subq++)
7092 {
7093 if (!(*subq)->sj_chosen)
7094 continue;
7095
7096 OPT_TRACE_TRANSFORM(trace, oto0, oto1,
7097 (*subq)->unit->first_select()->select_number,
7098 "IN (SELECT)", "semijoin");
7099 oto1.add("chosen", true);
7100 if (convert_subquery_to_semijoin(this, *subq))
7101 DBUG_RETURN(TRUE);
7102 }
7103 /*
7104 3. Finalize the subqueries that we did not convert,
7105 ie. perform IN->EXISTS rewrite.
7106 */
7107 for (subq= sj_subselects.begin(); subq < subq_end; subq++)
7108 {
7109 if ((*subq)->sj_chosen)
7110 continue;
7111 {
7112 OPT_TRACE_TRANSFORM(trace, oto0, oto1,
7113 (*subq)->unit->first_select()->select_number,
7114 "IN (SELECT)", "semijoin");
7115 if (outer_join_objection)
7116 oto1.add_alnum("cause", "outer_join");
7117 oto1.add("chosen", false);
7118 }
7119 JOIN *child_join= (*subq)->unit->first_select()->join;
7120 Item_subselect::trans_res res;
7121 (*subq)->changed= 0;
7122 (*subq)->fixed= 0;
7123
7124 SELECT_LEX *save_select_lex= thd->lex->current_select;
7125 thd->lex->current_select= (*subq)->unit->first_select();
7126
7127 res= (*subq)->select_transformer(child_join);
7128
7129 thd->lex->current_select= save_select_lex;
7130
7131 if (res == Item_subselect::RES_ERROR)
7132 DBUG_RETURN(TRUE);
7133
7134 (*subq)->changed= 1;
7135 (*subq)->fixed= 1;
7136
7137 Item *substitute= (*subq)->substitution;
7138 const bool do_fix_fields= !(*subq)->substitution->fixed;
7139 const bool subquery_in_join_clause= (*subq)->embedding_join_nest != NULL;
7140
7141 Item **tree= subquery_in_join_clause ?
7142 ((*subq)->embedding_join_nest->join_cond_ref()) : &conds;
7143 if (replace_subcondition(this, tree, *subq, substitute, do_fix_fields))
7144 DBUG_RETURN(TRUE);
7145 (*subq)->substitution= NULL;
7146
7147 if (!thd->stmt_arena->is_conventional())
7148 {
7149 if (subquery_in_join_clause)
7150 {
7151 tree= &((*subq)->embedding_join_nest->prep_join_cond);
7152 /*
7153 Some precaution is needed when dealing with PS/SP:
7154 fix_prepare_info_in_table_list() sets prep_join_cond, but only for
7155 tables, not for join nest objects. This is instead populated in
7156 record_join_nest_info(), which is called after this function.
7157 The case where *tree is NULL is handled by this procedure.
7158 */
7159 }
7160 else
7161 tree= &select_lex->prep_where;
7162
7163 if (*tree && replace_subcondition(this, tree, *subq, substitute, false))
7164 DBUG_RETURN(true);
7165 }
7166 }
7167
7168 sj_subselects.clear();
7169 DBUG_RETURN(FALSE);
7170 }
7171
7172
7173 /*
7174 Remove the predicates pushed down into the subquery
7175
7176 SYNOPSIS
7177 JOIN::remove_subq_pushed_predicates()
7178 where IN Must be NULL
7179 OUT The remaining WHERE condition, or NULL
7180
7181 DESCRIPTION
7182 Given that this join will be executed using (unique|index)_subquery,
7183 without "checking NULL", remove the predicates that were pushed down
7184 into the subquery.
7185
7186 If the subquery compares scalar values, we can remove the condition that
7187 was wrapped into trig_cond (it will be checked when needed by the subquery
7188 engine)
7189
7190 If the subquery compares row values, we need to keep the wrapped
7191 equalities in the WHERE clause: when the left (outer) tuple has both NULL
7192 and non-NULL values, we'll do a full table scan and will rely on the
7193 equalities corresponding to non-NULL parts of left tuple to filter out
7194 non-matching records.
7195
7196 If '*where' is a triggered condition, or contains 'OR x IS NULL', or
7197 contains a condition coming from the original subquery's WHERE clause, or
7198 if there are more than one outer expressions, then WHERE is not of the
7199 simple form:
7200 outer_expr = inner_expr
7201 and thus this function does nothing.
7202
7203 If the index is on prefix (=> test_if_ref() is false), then the equality
7204 is needed as post-filter, so this function does nothing.
7205
7206 TODO: We can remove the equalities that will be guaranteed to be true by the
7207 fact that subquery engine will be using index lookup. This must be done only
7208 for cases where there are no conversion errors of significance, e.g. 257
7209 that is searched in a byte. But this requires homogenization of the return
7210 codes of all Field*::store() methods.
7211 */
remove_subq_pushed_predicates(Item ** where)7212 void JOIN::remove_subq_pushed_predicates(Item **where)
7213 {
7214 if (conds->type() == Item::FUNC_ITEM &&
7215 ((Item_func *)this->conds)->functype() == Item_func::EQ_FUNC &&
7216 ((Item_func *)conds)->arguments()[0]->type() == Item::REF_ITEM &&
7217 ((Item_func *)conds)->arguments()[1]->type() == Item::FIELD_ITEM &&
7218 test_if_ref (this->conds,
7219 (Item_field *)((Item_func *)conds)->arguments()[1],
7220 ((Item_func *)conds)->arguments()[0]))
7221 {
7222 *where= 0;
7223 return;
7224 }
7225 }
7226
7227
7228 /**
7229 @brief
7230 Add keys to derived tables'/views' result tables in a list
7231
7232 @param select_lex generate derived keys for select_lex's derived tables
7233
7234 @details
7235 This function generates keys for all derived tables/views of the select_lex
7236 to which this join corresponds to with help of the TABLE_LIST:generate_keys
7237 function.
7238
7239 @return FALSE all keys were successfully added.
7240 @return TRUE OOM error
7241 */
7242
generate_derived_keys()7243 bool JOIN::generate_derived_keys()
7244 {
7245 DBUG_ASSERT(select_lex->materialized_table_count);
7246
7247 for (TABLE_LIST *table= select_lex->leaf_tables;
7248 table;
7249 table= table->next_leaf)
7250 {
7251 table->derived_keys_ready= TRUE;
7252 /* Process tables that aren't materialized yet. */
7253 if (table->uses_materialization() && !table->table->is_created() &&
7254 table->generate_keys())
7255 return TRUE;
7256 }
7257 return FALSE;
7258 }
7259
7260
7261 /**
7262 @brief
7263 Drop unused keys for each materialized derived table/view
7264
7265 @details
7266 For each materialized derived table/view, call TABLE::use_index to save one
7267 index chosen by the optimizer and ignore others. If no key is chosen, then all
7268 keys will be ignored.
7269 */
7270
drop_unused_derived_keys()7271 void JOIN::drop_unused_derived_keys()
7272 {
7273 DBUG_ASSERT(select_lex->materialized_table_count);
7274
7275 for (uint i= 0 ; i < tables ; i++)
7276 {
7277 JOIN_TAB *tab= join_tab + i;
7278 TABLE *table= tab->table;
7279 /*
7280 Save chosen key description if:
7281 1) it's a materialized derived table
7282 2) it's not yet instantiated
7283 3) some keys are defined for it
7284 */
7285 if (table &&
7286 table->pos_in_table_list->uses_materialization() && // (1)
7287 !table->is_created() && // (2)
7288 table->max_keys > 0) // (3)
7289 {
7290 Key_use *keyuse= tab->position->key;
7291
7292 table->use_index(keyuse ? keyuse->key : -1);
7293
7294 const bool key_is_const= keyuse && tab->const_keys.is_set(keyuse->key);
7295 tab->const_keys.clear_all();
7296 tab->keys.clear_all();
7297
7298 if (!keyuse)
7299 continue;
7300
7301 /*
7302 Update the selected "keyuse" to point to key number 0.
7303 Notice that unused keyuse entries still point to the deleted
7304 candidate keys. tab->keys (and tab->const_keys if the chosen key
7305 is constant) should reference key object no. 0 as well.
7306 */
7307 tab->keys.set_bit(0);
7308 if (key_is_const)
7309 tab->const_keys.set_bit(0);
7310
7311 const uint oldkey= keyuse->key;
7312 for (; keyuse->table == table && keyuse->key == oldkey; keyuse++)
7313 keyuse->key= 0;
7314 }
7315 }
7316 }
7317
7318
7319 /**
7320 Cache constant expressions in WHERE, HAVING, ON conditions.
7321
7322 @return False if success, True if error
7323
7324 @note This function is run after conditions have been pushed down to
7325 individual tables, so transformation is applied to JOIN_TAB::condition
7326 and not to the WHERE condition.
7327 */
7328
cache_const_exprs()7329 bool JOIN::cache_const_exprs()
7330 {
7331 /* No need in cache if all tables are constant. */
7332 DBUG_ASSERT(!plan_is_const());
7333
7334 for (uint i= const_tables; i < tables; i++)
7335 {
7336 Item *condition= join_tab[i].condition();
7337 if (condition == NULL)
7338 continue;
7339 Item *cache_item= NULL;
7340 Item **analyzer_arg= &cache_item;
7341 condition=
7342 condition->compile(&Item::cache_const_expr_analyzer,
7343 (uchar **)&analyzer_arg,
7344 &Item::cache_const_expr_transformer,
7345 (uchar *)&cache_item);
7346 if (condition == NULL)
7347 return true;
7348 if (condition != join_tab[i].condition())
7349 join_tab[i].set_condition(condition, __LINE__);
7350 }
7351 if (having)
7352 {
7353 Item *cache_item= NULL;
7354 Item **analyzer_arg= &cache_item;
7355 having=
7356 having->compile(&Item::cache_const_expr_analyzer, (uchar **)&analyzer_arg,
7357 &Item::cache_const_expr_transformer,(uchar *)&cache_item);
7358 if (having == NULL)
7359 return true;
7360 }
7361 return false;
7362 }
7363
7364
replace_item_field(const char * field_name,Item * new_item)7365 void JOIN::replace_item_field(const char* field_name, Item* new_item)
7366 {
7367 if (conds)
7368 {
7369 conds= conds->compile(&Item::item_field_by_name_analyzer,
7370 (uchar **)&field_name,
7371 &Item::item_field_by_name_transformer,
7372 (uchar *)new_item);
7373 conds->update_used_tables();
7374 }
7375
7376 List_iterator<Item> it(fields_list);
7377 Item *item;
7378 while ((item= it++))
7379 {
7380 item= item->compile(&Item::item_field_by_name_analyzer,
7381 (uchar **)&field_name,
7382 &Item::item_field_by_name_transformer,
7383 (uchar *)new_item);
7384 it.replace(item);
7385 item->update_used_tables();
7386 }
7387 }
7388
7389
7390 /**
7391 Extract a condition that can be checked after reading given table
7392
7393 @param cond Condition to analyze
7394 @param tables Tables for which "current field values" are available
7395 @param used_table Table(s) that we are extracting the condition for (may
7396 also include PSEUDO_TABLE_BITS, and may be zero)
7397 @param exclude_expensive_cond Do not push expensive conditions
7398
7399 @retval <>NULL Generated condition
7400 @retval = NULL Already checked, OR error
7401
7402 @details
7403 Extract the condition that can be checked after reading the table(s)
7404 specified in @c used_table, given that current-field values for tables
7405 specified in @c tables bitmap are available.
7406 If @c used_table is 0, extract conditions for all tables in @c tables.
7407
7408 This function can be used to extract conditions relevant for a table
7409 in a join order. Together with its caller, it will ensure that all
7410 conditions are attached to the first table in the join order where all
7411 necessary fields are available, and it will also ensure that a given
7412 condition is attached to only one table.
7413 To accomplish this, first initialize @c tables to the empty
7414 set. Then, loop over all tables in the join order, set @c used_table to
7415 the bit representing the current table, accumulate @c used_table into the
7416 @c tables set, and call this function. To ensure correct handling of
7417 const expressions and outer references, add the const table map and
7418 OUTER_REF_TABLE_BIT to @c used_table for the first table. To ensure
7419 that random expressions are evaluated for the final table, add
7420 RAND_TABLE_BIT to @c used_table for the final table.
7421
7422 The function assumes that constant, inexpensive parts of the condition
7423 have already been checked. Constant, expensive parts will be attached
7424 to the first table in the join order, provided that the above call
7425 sequence is followed.
7426
7427 The call order will ensure that conditions covering tables in @c tables
7428 minus those in @c used_table, have already been checked.
7429
7430 The function takes into account that some parts of the condition are
7431 guaranteed to be true by employed 'ref' access methods (the code that
7432 does this is located at the end, search down for "EQ_FUNC").
7433
7434 @note
7435 make_cond_for_info_schema() uses an algorithm similar to
7436 make_cond_for_table().
7437 */
7438
7439 Item *
make_cond_for_table(Item * cond,table_map tables,table_map used_table,bool exclude_expensive_cond)7440 make_cond_for_table(Item *cond, table_map tables, table_map used_table,
7441 bool exclude_expensive_cond)
7442 {
7443 return make_cond_for_table_from_pred(cond, cond, tables, used_table,
7444 exclude_expensive_cond);
7445 }
7446
7447 static Item *
make_cond_for_table_from_pred(Item * root_cond,Item * cond,table_map tables,table_map used_table,bool exclude_expensive_cond)7448 make_cond_for_table_from_pred(Item *root_cond, Item *cond,
7449 table_map tables, table_map used_table,
7450 bool exclude_expensive_cond)
7451 {
7452 /*
7453 Ignore this condition if
7454 1. We are extracting conditions for a specific table, and
7455 2. that table is not referenced by the condition, but not if
7456 3. this is a constant condition not checked at optimization time and
7457 this is the first table we are extracting conditions for.
7458 (Assuming that used_table == tables for the first table.)
7459 */
7460 if (used_table && // 1
7461 !(cond->used_tables() & used_table) && // 2
7462 !(cond->is_expensive() && used_table == tables)) // 3
7463 return NULL;
7464
7465 if (cond->type() == Item::COND_ITEM)
7466 {
7467 if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
7468 {
7469 /* Create new top level AND item */
7470 Item_cond_and *new_cond= new Item_cond_and;
7471 if (!new_cond)
7472 return NULL;
7473 List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
7474 Item *item;
7475 while ((item= li++))
7476 {
7477 Item *fix= make_cond_for_table_from_pred(root_cond, item,
7478 tables, used_table,
7479 exclude_expensive_cond);
7480 if (fix)
7481 new_cond->argument_list()->push_back(fix);
7482 }
7483 switch (new_cond->argument_list()->elements) {
7484 case 0:
7485 return NULL; // Always true
7486 case 1:
7487 return new_cond->argument_list()->head();
7488 default:
7489 if (new_cond->fix_fields(current_thd, NULL))
7490 return NULL;
7491 return new_cond;
7492 }
7493 }
7494 else
7495 { // Or list
7496 Item_cond_or *new_cond= new Item_cond_or;
7497 if (!new_cond)
7498 return NULL;
7499 List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
7500 Item *item;
7501 while ((item= li++))
7502 {
7503 Item *fix= make_cond_for_table_from_pred(root_cond, item,
7504 tables, 0L,
7505 exclude_expensive_cond);
7506 if (!fix)
7507 return NULL; // Always true
7508 new_cond->argument_list()->push_back(fix);
7509 }
7510 if (new_cond->fix_fields(current_thd, NULL))
7511 return NULL;
7512 return new_cond;
7513 }
7514 }
7515
7516 /*
7517 Omit this condition if
7518 1. It has been marked as omittable before, or
7519 2. Some tables referred by the condition are not available, or
7520 3. We are extracting conditions for all tables, the condition is
7521 considered 'expensive', and we want to delay evaluation of such
7522 conditions to the execution phase.
7523 */
7524 if (cond->marker == 3 || // 1
7525 (cond->used_tables() & ~tables) || // 2
7526 (!used_table && exclude_expensive_cond && cond->is_expensive())) // 3
7527 return NULL;
7528
7529 /*
7530 Extract this condition if
7531 1. It has already been marked as applicable, or
7532 2. It is not a <comparison predicate> (=, <, >, <=, >=, <=>)
7533 */
7534 if (cond->marker == 2 || // 1
7535 cond->eq_cmp_result() == Item::COND_OK) // 2
7536 return cond;
7537
7538 /*
7539 Remove equalities that are guaranteed to be true by use of 'ref' access
7540 method.
7541 Note that ref access implements "table1.field1 <=> table2.indexed_field2",
7542 i.e. if it passed a NULL field1, it will return NULL indexed_field2 if
7543 there are.
7544 Thus the equality "table1.field1 = table2.indexed_field2",
7545 is equivalent to "ref access AND table1.field1 IS NOT NULL"
7546 i.e. "ref access and proper setting/testing of ref->null_rejecting".
7547 Thus, we must be careful, that when we remove equalities below we also
7548 set ref->null_rejecting, and test it at execution; otherwise wrong NULL
7549 matches appear.
7550 So:
7551 - for the optimization phase, the code which is below, and the code in
7552 test_if_ref(), and in add_key_field(), must be kept in sync: if the
7553 applicability conditions in one place are relaxed, they should also be
7554 relaxed elsewhere.
7555 - for the execution phase, all possible execution methods must test
7556 ref->null_rejecting.
7557 */
7558 if (cond->type() == Item::FUNC_ITEM &&
7559 ((Item_func*) cond)->functype() == Item_func::EQ_FUNC)
7560 {
7561 Item *left_item= ((Item_func*) cond)->arguments()[0]->real_item();
7562 Item *right_item= ((Item_func*) cond)->arguments()[1]->real_item();
7563 if ((left_item->type() == Item::FIELD_ITEM &&
7564 test_if_ref(root_cond, (Item_field*) left_item, right_item)) ||
7565 (right_item->type() == Item::FIELD_ITEM &&
7566 test_if_ref(root_cond, (Item_field*) right_item, left_item)))
7567 {
7568 cond->marker= 3; // Condition can be omitted
7569 return NULL;
7570 }
7571 }
7572 cond->marker= 2; // Mark condition as applicable
7573 return cond;
7574 }
7575
7576
7577 /**
7578 Separates the predicates in a join condition and pushes them to the
7579 join step where all involved tables are available in the join prefix.
7580 ON clauses from JOIN expressions are also pushed to the most appropriate step.
7581
7582 @param join Join object where predicates are pushed.
7583
7584 @param cond Pointer to condition which may contain an arbitrary number of
7585 predicates, combined using AND, OR and XOR items.
7586 If NULL, equivalent to a predicate that returns TRUE for all
7587 row combinations.
7588
7589
7590 @retval true Found impossible WHERE clause, or out-of-memory
7591 @retval false Other
7592 */
7593
make_join_select(JOIN * join,Item * cond)7594 static bool make_join_select(JOIN *join, Item *cond)
7595 {
7596 THD *thd= join->thd;
7597 Opt_trace_context * const trace= &thd->opt_trace;
7598 DBUG_ENTER("make_join_select");
7599 {
7600 add_not_null_conds(join);
7601 /*
7602 Step #1: Extract constant condition
7603 - Extract and check the constant part of the WHERE
7604 - Extract constant parts of ON expressions from outer
7605 joins and attach them appropriately.
7606 */
7607 if (cond) /* Because of QUICK_GROUP_MIN_MAX_SELECT */
7608 { /* there may be a select without a cond. */
7609 if (join->primary_tables > 1)
7610 cond->update_used_tables(); // Tablenr may have changed
7611 if (join->plan_is_const() &&
7612 thd->lex->current_select->master_unit() ==
7613 &thd->lex->unit) // not upper level SELECT
7614 join->const_table_map|=RAND_TABLE_BIT;
7615
7616 /*
7617 Extract expressions that depend on constant tables
7618 1. Const part of the join's WHERE clause can be checked immediately
7619 and if it is not satisfied then the join has empty result
7620 2. Constant parts of outer joins' ON expressions must be attached
7621 there inside the triggers.
7622 */
7623 {
7624 Item *const_cond=
7625 make_cond_for_table(cond,
7626 join->const_table_map,
7627 (table_map) 0, 1);
7628 /* Add conditions added by add_not_null_conds(). */
7629 for (uint i= 0 ; i < join->const_tables ; i++)
7630 {
7631 if (and_conditions(&const_cond, join->join_tab[i].condition()))
7632 DBUG_RETURN(true);
7633 }
7634
7635 DBUG_EXECUTE("where",print_where(const_cond,"constants", QT_ORDINARY););
7636 for (JOIN_TAB *tab= join->join_tab+join->const_tables;
7637 tab < join->join_tab+join->tables ; tab++)
7638 {
7639 if (tab->on_expr_ref && *tab->on_expr_ref)
7640 {
7641 JOIN_TAB *cond_tab= tab->first_inner;
7642 Item *tmp= make_cond_for_table(*tab->on_expr_ref,
7643 join->const_table_map,
7644 ( table_map) 0, 0);
7645 if (!tmp)
7646 continue;
7647 tmp= new
7648 Item_func_trig_cond(tmp, &cond_tab->not_null_compl, cond_tab,
7649 Item_func_trig_cond::IS_NOT_NULL_COMPL);
7650 if (!tmp)
7651 DBUG_RETURN(true);
7652
7653 tmp->quick_fix_field();
7654 if (cond_tab->and_with_condition(tmp, __LINE__))
7655 DBUG_RETURN(true);
7656 }
7657 }
7658 if (const_cond != NULL)
7659 {
7660 const bool const_cond_is_true= const_cond->val_int() != 0;
7661 Opt_trace_object trace_const_cond(trace);
7662 trace_const_cond.add("condition_on_constant_tables", const_cond)
7663 .add("condition_value", const_cond_is_true);
7664 if (!const_cond_is_true)
7665 {
7666 DBUG_PRINT("info",("Found impossible WHERE condition"));
7667 DBUG_RETURN(1); // Impossible const condition
7668 }
7669 }
7670 }
7671 }
7672
7673 /*
7674 Step #2: Extract WHERE/ON parts
7675 */
7676 Opt_trace_object trace_wrapper(trace);
7677 Opt_trace_object
7678 trace_conditions(trace, "attaching_conditions_to_tables");
7679 trace_conditions.add("original_condition", cond);
7680 Opt_trace_array
7681 trace_attached_comp(trace, "attached_conditions_computation");
7682
7683 for (uint i=join->const_tables ; i < join->tables ; i++)
7684 {
7685 JOIN_TAB *const tab= join->join_tab + i;
7686
7687 if (!tab->position)
7688 continue;
7689 /*
7690 first_inner is the X in queries like:
7691 SELECT * FROM t1 LEFT OUTER JOIN (t2 JOIN t3) ON X
7692 */
7693 JOIN_TAB *const first_inner_tab= tab->first_inner;
7694 const table_map used_tables= tab->prefix_tables();
7695 const table_map current_map= tab->added_tables();
7696 bool use_quick_range=0;
7697 Item *tmp;
7698
7699 /// See if you need to switch to range access
7700 if (tab->type == JT_REF && can_switch_from_ref_to_range(thd, tab))
7701 {
7702 Opt_trace_object wrapper(trace);
7703 Opt_trace_object (trace, "access_type_changed").
7704 add_utf8_table(tab->table).
7705 add_utf8("index", tab->table->key_info[tab->ref.key].name).
7706 add_alnum("old_type", "ref").
7707 add_alnum("new_type", "range").
7708 add_alnum("cause", "uses_more_keyparts");
7709
7710 tab->type=JT_ALL;
7711 use_quick_range=1;
7712 tab->use_quick=QS_RANGE;
7713 tab->ref.key= -1;
7714 tab->ref.key_parts=0; // Don't use ref key.
7715 tab->position->records_read= rows2double(tab->quick->records);
7716 /*
7717 We will use join cache here : prevent sorting of the first
7718 table only and sort at the end.
7719 */
7720 if (i != join->const_tables &&
7721 join->primary_tables > join->const_tables + 1)
7722 join->full_join= true;
7723 }
7724
7725 tmp= NULL;
7726 if (cond)
7727 tmp= make_cond_for_table(cond,used_tables,current_map, 0);
7728 /* Add conditions added by add_not_null_conds(). */
7729 if (tab->condition() && and_conditions(&tmp, tab->condition()))
7730 DBUG_RETURN(true);
7731
7732
7733 if (cond && !tmp && tab->quick)
7734 { // Outer join
7735 if (tab->type != JT_ALL)
7736 {
7737 /*
7738 Don't use the quick method
7739 We come here in the case where we have 'key=constant' and
7740 the test is removed by make_cond_for_table()
7741 */
7742 delete tab->quick;
7743 tab->quick= 0;
7744 }
7745 else
7746 {
7747 /*
7748 Hack to handle the case where we only refer to a table
7749 in the ON part of an OUTER JOIN. In this case we want the code
7750 below to check if we should use 'quick' instead.
7751 */
7752 DBUG_PRINT("info", ("Item_int"));
7753 tmp= new Item_int((longlong) 1,1); // Always true
7754 }
7755
7756 }
7757 if (tmp || !cond || tab->type == JT_REF || tab->type == JT_REF_OR_NULL ||
7758 tab->type == JT_EQ_REF || first_inner_tab)
7759 {
7760 DBUG_EXECUTE("where",print_where(tmp,tab->table->alias, QT_ORDINARY););
7761 SQL_SELECT *sel= tab->select= new (thd->mem_root) SQL_SELECT;
7762 if (!sel)
7763 DBUG_RETURN(1); // End of memory
7764 sel->read_tables= sel->const_tables= join->const_table_map;
7765 /*
7766 If tab is an inner table of an outer join operation,
7767 add a match guard to the pushed down predicate.
7768 The guard will turn the predicate on only after
7769 the first match for outer tables is encountered.
7770 */
7771 if (cond && tmp)
7772 {
7773 /*
7774 Because of QUICK_GROUP_MIN_MAX_SELECT there may be a select without
7775 a cond, so neutralize the hack above.
7776 */
7777 if (!(tmp= add_found_match_trig_cond(first_inner_tab, tmp, 0)))
7778 DBUG_RETURN(true);
7779 sel->cond= tmp;
7780 tab->set_condition(tmp, __LINE__);
7781 /* Push condition to storage engine if this is enabled
7782 and the condition is not guarded */
7783 if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN) &&
7784 !first_inner_tab)
7785 {
7786 Item *push_cond=
7787 make_cond_for_table(tmp, tab->table->map, tab->table->map, 0);
7788 if (push_cond)
7789 {
7790 /* Push condition to handler */
7791 if (!tab->table->file->cond_push(push_cond))
7792 tab->table->file->pushed_cond= push_cond;
7793 }
7794 }
7795 }
7796 else
7797 {
7798 sel->cond= NULL;
7799 tab->set_condition(NULL, __LINE__);
7800 }
7801
7802 sel->head=tab->table;
7803 DBUG_EXECUTE("where",print_where(tmp,tab->table->alias, QT_ORDINARY););
7804 if (tab->quick)
7805 {
7806 /* Use quick key read if it's a constant and it's not used
7807 with key reading */
7808 if (tab->needed_reg.is_clear_all() && tab->type != JT_EQ_REF &&
7809 tab->type != JT_FT &&
7810 ((tab->type != JT_CONST && tab->type != JT_REF) ||
7811 (uint)tab->ref.key == tab->quick->index))
7812 {
7813 DBUG_ASSERT(tab->quick->is_valid());
7814 sel->quick=tab->quick; // Use value from get_quick_...
7815 sel->quick_keys.clear_all();
7816 sel->needed_reg.clear_all();
7817 }
7818 else
7819 {
7820 delete tab->quick;
7821 }
7822 tab->quick=0;
7823 }
7824 uint ref_key=(uint) sel->head->reginfo.join_tab->ref.key+1;
7825 if (i == join->const_tables && ref_key)
7826 {
7827 if (!tab->const_keys.is_clear_all() &&
7828 tab->table->reginfo.impossible_range)
7829 DBUG_RETURN(1);
7830 }
7831 else if (tab->type == JT_ALL && ! use_quick_range)
7832 {
7833 if (!tab->const_keys.is_clear_all() &&
7834 tab->table->reginfo.impossible_range)
7835 DBUG_RETURN(1); // Impossible range
7836 /*
7837 We plan to scan (table/index/range scan).
7838 Check again if we should use an index. We can use an index if:
7839
7840 1a) There is a condition that range optimizer can work on, and
7841 1b) There are non-constant conditions on one or more keys, and
7842 1c) Some of the non-constant fields may have been read
7843 already. This may be the case if this is not the first
7844 table in the join OR this is a subselect with
7845 non-constant conditions referring to an outer table
7846 (dependent subquery)
7847 or,
7848 2a) There are conditions only relying on constants
7849 2b) This is the first non-constant table
7850 2c) There is a limit of rows to read that is lower than
7851 the fanout for this table (i.e., the estimated number
7852 of rows that will be produced for this table per row
7853 combination of previous tables)
7854 2d) The query is NOT run with FOUND_ROWS() (because in that
7855 case we have to scan through all rows to count them anyway)
7856 */
7857 enum { DONT_RECHECK, NOT_FIRST_TABLE, LOW_LIMIT }
7858 recheck_reason= DONT_RECHECK;
7859
7860 if (cond && // 1a
7861 (tab->keys != tab->const_keys) && // 1b
7862 (i > 0 || // 1c
7863 (join->select_lex->master_unit()->item &&
7864 cond->used_tables() & OUTER_REF_TABLE_BIT)))
7865 recheck_reason= NOT_FIRST_TABLE;
7866 else if (!tab->const_keys.is_clear_all() && // 2a
7867 i == join->const_tables && // 2b
7868 (join->unit->select_limit_cnt <
7869 tab->position->records_read) && // 2c
7870 !(join->select_options & OPTION_FOUND_ROWS)) // 2d
7871 recheck_reason= LOW_LIMIT;
7872
7873 if (recheck_reason != DONT_RECHECK)
7874 {
7875 Opt_trace_object trace_one_table(trace);
7876 trace_one_table.add_utf8_table(tab->table);
7877 Opt_trace_object trace_table(trace, "rechecking_index_usage");
7878 if (recheck_reason == NOT_FIRST_TABLE)
7879 trace_table.add_alnum("recheck_reason", "not_first_table");
7880 else
7881 trace_table.add_alnum("recheck_reason", "low_limit").
7882 add("limit", join->unit->select_limit_cnt).
7883 add("row_estimate", tab->position->records_read);
7884
7885 /* Join with outer join condition */
7886 Item *orig_cond=sel->cond;
7887 sel->cond= and_conds(sel->cond, *tab->on_expr_ref);
7888
7889 /*
7890 We can't call sel->cond->fix_fields,
7891 as it will break tab->join_cond() if it's AND condition
7892 (fix_fields currently removes extra AND/OR levels).
7893 Yet attributes of the just built condition are not needed.
7894 Thus we call sel->cond->quick_fix_field for safety.
7895 */
7896 if (sel->cond && !sel->cond->fixed)
7897 sel->cond->quick_fix_field();
7898
7899 key_map usable_keys= tab->keys;
7900 if (tab->table->force_index)
7901 usable_keys.intersect(tab->table->keys_in_use_for_order_by);
7902
7903 ORDER::enum_order interesting_order= ORDER::ORDER_NOT_RELEVANT;
7904
7905 if (recheck_reason == LOW_LIMIT)
7906 {
7907 /*
7908 When optimizing for ORDER BY ... LIMIT, only indexes
7909 that give correct ordering are of interest. The block
7910 below removes all other indexes from usable_keys so
7911 the range optimizer (see test_quick_select() below)
7912 does not consider them.
7913 */
7914 for (uint idx= 0; idx < tab->table->s->keys; idx++)
7915 {
7916 /*
7917 No need to check if indexes that we're not allowed
7918 to use can provide required ordering.
7919 */
7920 if (!usable_keys.is_set(idx))
7921 continue;
7922
7923 const int read_direction=
7924 test_if_order_by_key(join->order, tab->table, idx);
7925 if (read_direction == 0)
7926 {
7927 // The index cannot provide required ordering
7928 usable_keys.clear_bit(idx);
7929 continue;
7930 }
7931
7932 /*
7933 Currently, only ASC ordered indexes are availabe,
7934 which means that if ordering can be achieved by
7935 reading the index in forward direction, then we have
7936 ORDER BY... ASC. Likewise, if ordering can be
7937 achieved by reading the index in backward direction,
7938 then we have ORDER BY ... DESC.
7939
7940 Furthermore, if correct order can be achieved by
7941 reading one index in either forward or backward
7942 direction, then all other applicable indexes will
7943 need to be read in the same direction (so no reason
7944 to check that read_direction is the same for all
7945 applicable indexes).
7946
7947 If DESC/mixed ordered indexes will be possible in
7948 the future, the implied connection between index
7949 read direction and ASC/DESC ordering will no longer
7950 hold.
7951 */
7952 interesting_order= (read_direction == -1 ? ORDER::ORDER_DESC :
7953 ORDER::ORDER_ASC);
7954 }
7955
7956 if (usable_keys.is_clear_all())
7957 recheck_reason= DONT_RECHECK; // No usable keys
7958
7959 /*
7960 If the current plan is to use a range access on an
7961 index that provides the order dictated by the ORDER BY
7962 clause there is no need to recheck index usage; we
7963 already know from the former call to
7964 test_quick_select() that a range scan on the chosen
7965 index is cheapest. Note that previous calls to
7966 test_quick_select() did not take order direction
7967 (ASC/DESC) into account, so in case of DESC ordering
7968 we still need to recheck.
7969 */
7970 if (sel->quick && (sel->quick->index != MAX_KEY) &&
7971 usable_keys.is_set(sel->quick->index) &&
7972 (interesting_order != ORDER::ORDER_DESC ||
7973 sel->quick->reverse_sorted()))
7974 {
7975 recheck_reason= DONT_RECHECK;
7976 }
7977 }
7978
7979 if ((recheck_reason != DONT_RECHECK) &&
7980 sel->test_quick_select(thd, usable_keys,
7981 used_tables & ~tab->table->map,
7982 (join->select_options &
7983 OPTION_FOUND_ROWS ?
7984 HA_POS_ERROR :
7985 join->unit->select_limit_cnt),
7986 false, // don't force quick range
7987 interesting_order) < 0)
7988 {
7989 /*
7990 Before reporting "Impossible WHERE" for the whole query
7991 we have to check isn't it only "impossible ON" instead
7992 */
7993 sel->cond=orig_cond;
7994 if (!*tab->on_expr_ref)
7995 DBUG_RETURN(1); // Impossible WHERE
7996 Opt_trace_object trace_without_on(trace, "without_ON_clause");
7997 if (sel->test_quick_select(thd, tab->keys,
7998 used_tables & ~tab->table->map,
7999 (join->select_options &
8000 OPTION_FOUND_ROWS ?
8001 HA_POS_ERROR :
8002 join->unit->select_limit_cnt),
8003 false, //don't force quick range
8004 ORDER::ORDER_NOT_RELEVANT) < 0)
8005 DBUG_RETURN(1); // Impossible WHERE
8006 }
8007 else
8008 sel->cond=orig_cond;
8009
8010 /* Fix for EXPLAIN */
8011 if (sel->quick)
8012 tab->position->records_read= (double)sel->quick->records;
8013 }
8014 else
8015 {
8016 sel->needed_reg=tab->needed_reg;
8017 sel->quick_keys.clear_all();
8018 }
8019 if (!sel->quick_keys.is_subset(tab->checked_keys) ||
8020 !sel->needed_reg.is_subset(tab->checked_keys))
8021 {
8022 tab->keys=sel->quick_keys;
8023 tab->keys.merge(sel->needed_reg);
8024 tab->use_quick= (!sel->needed_reg.is_clear_all() &&
8025 (sel->quick_keys.is_clear_all() ||
8026 (sel->quick &&
8027 (sel->quick->records >= 100L)))) ?
8028 QS_DYNAMIC_RANGE : QS_RANGE;
8029 sel->read_tables= used_tables & ~current_map;
8030 }
8031 if (i != join->const_tables && tab->use_quick != QS_DYNAMIC_RANGE &&
8032 !tab->first_inner)
8033 { /* Read with cache */
8034 if (cond &&
8035 (tmp=make_cond_for_table(cond,
8036 join->const_table_map |
8037 current_map,
8038 current_map, 0)))
8039 {
8040 DBUG_EXECUTE("where",print_where(tmp,"cache", QT_ORDINARY););
8041 tab->cache_select=(SQL_SELECT*)
8042 thd->memdup((uchar*) sel, sizeof(SQL_SELECT));
8043 tab->cache_select->cond=tmp;
8044 tab->cache_select->read_tables=join->const_table_map;
8045 }
8046 }
8047 }
8048 }
8049
8050 if (pushdown_on_conditions(join, tab))
8051 DBUG_RETURN(1);
8052 }
8053 trace_attached_comp.end();
8054
8055 /*
8056 In outer joins the loop above, in iteration for table #i, may push
8057 conditions to a table before #i. Thus, the processing below has to be in
8058 a separate loop:
8059 */
8060 Opt_trace_array trace_attached_summary(trace,
8061 "attached_conditions_summary");
8062 for (uint i= join->const_tables ; i < join->tables ; i++)
8063 {
8064 JOIN_TAB * const tab= &join->join_tab[i];
8065 if (!tab->table)
8066 continue;
8067 Item * const cond= tab->condition();
8068 Opt_trace_object trace_one_table(trace);
8069 trace_one_table.add_utf8_table(tab->table).
8070 add("attached", cond);
8071 if (cond &&
8072 cond->has_subquery() /* traverse only if needed */ )
8073 {
8074 /*
8075 Why we pass walk_subquery=false: imagine
8076 WHERE t1.col IN (SELECT * FROM t2
8077 WHERE t2.col IN (SELECT * FROM t3)
8078 and tab==t1. The grandchild subquery (SELECT * FROM t3) should not
8079 be marked as "in condition of t1" but as "in condition of t2", for
8080 correct calculation of the number of its executions.
8081 */
8082 int idx= tab - join->join_tab;
8083 cond->walk(&Item::inform_item_in_cond_of_tab, false,
8084 reinterpret_cast<uchar * const>(&idx));
8085 }
8086
8087 }
8088 }
8089 DBUG_RETURN(0);
8090 }
8091
8092
8093 /**
8094 Remove the following expressions from ORDER BY and GROUP BY:
8095 Constant expressions @n
8096 Expression that only uses tables that are of type EQ_REF and the reference
8097 is in the ORDER list or if all refereed tables are of the above type.
8098
8099 In the following, the X field can be removed:
8100 @code
8101 SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t1.a,t2.X
8102 SELECT * FROM t1,t2,t3 WHERE t1.a=t2.a AND t2.b=t3.b ORDER BY t1.a,t3.X
8103 @endcode
8104
8105 These can't be optimized:
8106 @code
8107 SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t2.X,t1.a
8108 SELECT * FROM t1,t2 WHERE t1.a=t2.a AND t1.b=t2.b ORDER BY t1.a,t2.c
8109 SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t2.b,t1.a
8110 @endcode
8111
8112 @param JOIN join object
8113 @param start_order clause being analyzed (ORDER BY, GROUP BY...)
8114 @param tab table
8115 @param cached_eq_ref_tables bitmap: bit Z is set if the table of map Z
8116 was already the subject of an eq_ref_table() call for the same clause; then
8117 the return value of this previous call can be found at bit Z of
8118 'eq_ref_tables'
8119 @param eq_ref_tables see above.
8120 */
8121
8122 static bool
eq_ref_table(JOIN * join,ORDER * start_order,JOIN_TAB * tab,table_map * cached_eq_ref_tables,table_map * eq_ref_tables)8123 eq_ref_table(JOIN *join, ORDER *start_order, JOIN_TAB *tab,
8124 table_map *cached_eq_ref_tables, table_map *eq_ref_tables)
8125 {
8126 /* We can skip const tables only if not an outer table */
8127 if (tab->type == JT_CONST && !tab->first_inner)
8128 return true;
8129 if (tab->type != JT_EQ_REF || tab->table->maybe_null)
8130 return false;
8131
8132 const table_map map= tab->table->map;
8133 uint found= 0;
8134
8135 for (Item **ref_item= tab->ref.items, **end= ref_item + tab->ref.key_parts ;
8136 ref_item != end ; ref_item++)
8137 {
8138 if (! (*ref_item)->const_item())
8139 { // Not a const ref
8140 ORDER *order;
8141 for (order=start_order ; order ; order=order->next)
8142 {
8143 if ((*ref_item)->eq(order->item[0],0))
8144 break;
8145 }
8146 if (order)
8147 {
8148 if (!(order->used & map))
8149 {
8150 found++;
8151 order->used|= map;
8152 }
8153 continue; // Used in ORDER BY
8154 }
8155 if (!only_eq_ref_tables(join, start_order, (*ref_item)->used_tables(),
8156 cached_eq_ref_tables, eq_ref_tables))
8157 return false;
8158 }
8159 }
8160 /* Check that there was no reference to table before sort order */
8161 for (; found && start_order ; start_order=start_order->next)
8162 {
8163 if (start_order->used & map)
8164 {
8165 found--;
8166 continue;
8167 }
8168 if (start_order->depend_map & map)
8169 return false;
8170 }
8171 return true;
8172 }
8173
8174
8175 /// @see eq_ref_table()
8176 static bool
only_eq_ref_tables(JOIN * join,ORDER * order,table_map tables,table_map * cached_eq_ref_tables,table_map * eq_ref_tables)8177 only_eq_ref_tables(JOIN *join, ORDER *order, table_map tables,
8178 table_map *cached_eq_ref_tables, table_map *eq_ref_tables)
8179 {
8180 tables&= ~PSEUDO_TABLE_BITS;
8181 for (JOIN_TAB **tab=join->map2table ; tables ; tab++, tables>>=1)
8182 {
8183 if (tables & 1)
8184 {
8185 const table_map map= (*tab)->table->map;
8186 bool is_eq_ref;
8187 if (*cached_eq_ref_tables & map) // then there exists a cached bit
8188 is_eq_ref= *eq_ref_tables & map;
8189 else
8190 {
8191 is_eq_ref= eq_ref_table(join, order, *tab,
8192 cached_eq_ref_tables, eq_ref_tables);
8193 if (is_eq_ref)
8194 *eq_ref_tables|= map;
8195 else
8196 *eq_ref_tables&= ~map;
8197 *cached_eq_ref_tables|= map; // now there exists a cached bit
8198 }
8199 if (!is_eq_ref)
8200 return false;
8201 }
8202 }
8203 return true;
8204 }
8205
8206 /**
8207 Heuristic: Switch from 'ref' to 'range' access if 'range' access can utilize
8208 more keyparts than 'ref' access. Conditions for doing switching:
8209
8210 1) 'ref' access depends on a constant, not a value read from a table earlier
8211 in the join sequence.
8212
8213 Rationale: if 'ref' depends on a value from another table, the join condition
8214 is not used to limit the rows read by 'range' access (that would require
8215 dynamic range - 'Range checked for each record'). In other words, if 'ref'
8216 depends on a value from another table, we have a query with conditions of
8217 the form
8218 this_table.idx_col1 = other_table.col AND <<- used by 'ref'
8219 this_table.idx_col1 OP <const> AND <<- used by 'range'
8220 this_table.idx_col2 OP <const> AND ... <<- used by 'range'
8221
8222 and an index on (idx_col1,idx_col2,...). But the fact that 'range' access
8223 uses more keyparts does not mean that it is more selective than 'ref' access
8224 because these access types utilize different parts of the query condition. We
8225 therefore trust the cost based choice made by best_access_path() instead of
8226 forcing a heuristic choice here.
8227
8228 2) Range access is possible, and it is less costly than table/index scan.
8229
8230 3a) 'ref' access and 'range' access uses the same index.
8231 3b) 'range' access uses more keyparts than 'ref' access
8232
8233 OR
8234
8235 4) Ref has borrowed the index estimate from range and created a cost
8236 estimate (See Optimize_table_order::find_best_ref). This will be a
8237 problem if range built it's row estimate using a larger number of key
8238 parts than ref. In such a case, shift to range access over the same
8239 index. So run the range optimizer with that index as the only choice.
8240 (Condition 5 is not relevant here since it has been tested in
8241 find_best_ref.)
8242
8243 @param thd THD To re-run range optimizer.
8244 @param tab JOIN_TAB To check the above conditions.
8245
8246 @return true Range is better than ref
8247 @return false Ref is better or switch isn't possible
8248
8249 @todo: This decision should rather be made in best_access_path()
8250 */
can_switch_from_ref_to_range(THD * thd,JOIN_TAB * tab)8251 static bool can_switch_from_ref_to_range(THD *thd, JOIN_TAB *tab)
8252 {
8253 if (!tab->ref.depend_map && // 1)
8254 tab->quick) // 2)
8255 {
8256 if ((uint) tab->ref.key == tab->quick->index && // 3a)
8257 tab->ref.key_length < tab->quick->max_used_key_length) // 3b)
8258 return true;
8259 else if (tab->dodgy_ref_cost) // 4)
8260 {
8261 int error;
8262 SQL_SELECT *select;
8263 JOIN *join= tab->join;
8264 select= make_select(tab->table, join->found_const_table_map,
8265 join->found_const_table_map,
8266 *tab->on_expr_ref ? *tab->on_expr_ref : join->conds,
8267 1, &error);
8268
8269 if (select)
8270 {
8271 Opt_trace_context * const trace= &thd->opt_trace;
8272 Opt_trace_object trace_wrapper(trace);
8273 Opt_trace_array
8274 trace_setup_cond(trace,
8275 "rerunning_range_optimizer_for_single_index");
8276
8277 key_map new_ref_key_map;
8278 new_ref_key_map.set_bit(tab->position->key->key);
8279 bool retcode= false;
8280 if (select->test_quick_select(thd, new_ref_key_map, 0,
8281 (join->select_options &
8282 OPTION_FOUND_ROWS ? HA_POS_ERROR :
8283 join->unit->select_limit_cnt),
8284 false, // don't force quick range
8285 ORDER::ORDER_NOT_RELEVANT) > 0)
8286 {
8287 delete tab->quick;
8288 tab->quick= select->quick;
8289 retcode= true;
8290 }
8291 select->quick= 0;
8292 delete select;
8293 return retcode;
8294 }
8295 }
8296 }
8297 return false;
8298 }
8299
8300 /**
8301 Check if an expression in ORDER BY or GROUP BY is a duplicate of a
8302 preceding expression.
8303
8304 @param first_order the first expression in the ORDER BY or
8305 GROUP BY clause
8306 @param possible_dup the expression that might be a duplicate of
8307 another expression preceding it the ORDER BY
8308 or GROUP BY clause
8309
8310 @returns true if possible_dup is a duplicate, false otherwise
8311 */
duplicate_order(const ORDER * first_order,const ORDER * possible_dup)8312 static bool duplicate_order(const ORDER *first_order,
8313 const ORDER *possible_dup)
8314 {
8315 const ORDER *order;
8316 for (order=first_order; order ; order=order->next)
8317 {
8318 if (order == possible_dup)
8319 {
8320 // all expressions preceding possible_dup have been checked.
8321 return false;
8322 }
8323 else
8324 {
8325 const Item *it1= order->item[0]->real_item();
8326 const Item *it2= possible_dup->item[0]->real_item();
8327
8328 if (it1->type() == Item::FIELD_ITEM &&
8329 it2->type() == Item::FIELD_ITEM &&
8330 (static_cast<const Item_field*>(it1)->field ==
8331 static_cast<const Item_field*>(it2)->field))
8332 {
8333 return true;
8334 }
8335 }
8336 }
8337 return false;
8338 }
8339
8340 /**
8341 Remove all constants and check if ORDER only contains simple
8342 expressions.
8343
8344 simple_order is set to 1 if sort_order only uses fields from head table
8345 and the head table is not a LEFT JOIN table.
8346
8347 @param join Join handler
8348 @param first_order List of SORT or GROUP order
8349 @param cond WHERE statement
8350 @param change_list Set to 1 if we should remove things from list.
8351 If this is not set, then only simple_order is
8352 calculated.
8353 @param simple_order Set to 1 if we are only using simple expressions
8354 @param clause_type "ORDER BY" etc for printing in optimizer trace
8355
8356 @return
8357 Returns new sort order
8358 */
8359
8360 static ORDER *
remove_const(JOIN * join,ORDER * first_order,Item * cond,bool change_list,bool * simple_order,const char * clause_type)8361 remove_const(JOIN *join,ORDER *first_order, Item *cond,
8362 bool change_list, bool *simple_order, const char *clause_type)
8363 {
8364 if (join->plan_is_const())
8365 return change_list ? 0 : first_order; // No need to sort
8366
8367 Opt_trace_context * const trace= &join->thd->opt_trace;
8368 Opt_trace_disable_I_S trace_disabled(trace, first_order == NULL);
8369 Opt_trace_object trace_wrapper(trace);
8370 Opt_trace_object trace_simpl(trace, "clause_processing");
8371 if (trace->is_started())
8372 {
8373 trace_simpl.add_alnum("clause", clause_type);
8374 String str;
8375 st_select_lex::print_order(&str, first_order,
8376 enum_query_type(QT_TO_SYSTEM_CHARSET |
8377 QT_SHOW_SELECT_NUMBER |
8378 QT_NO_DEFAULT_DB));
8379 trace_simpl.add_utf8("original_clause", str.ptr(), str.length());
8380 }
8381 Opt_trace_array trace_each_item(trace, "items");
8382
8383 ORDER *order,**prev_ptr;
8384 table_map first_table= join->join_tab[join->const_tables].table->map;
8385 table_map not_const_tables= ~join->const_table_map;
8386 table_map ref;
8387 // Caches to avoid repeating eq_ref_table() calls, @see eq_ref_table()
8388 table_map eq_ref_tables= 0, cached_eq_ref_tables= 0;
8389 DBUG_ENTER("remove_const");
8390
8391 prev_ptr= &first_order;
8392 *simple_order= *join->join_tab[join->const_tables].on_expr_ref ? 0 : 1;
8393
8394 /* NOTE: A variable of not_const_tables ^ first_table; breaks gcc 2.7 */
8395
8396 update_depend_map(join, first_order);
8397 for (order=first_order; order ; order=order->next)
8398 {
8399 Opt_trace_object trace_one_item(trace);
8400 trace_one_item.add("item", order->item[0]);
8401 table_map order_tables=order->item[0]->used_tables();
8402 if (order->item[0]->with_sum_func ||
8403 /*
8404 If the outer table of an outer join is const (either by itself or
8405 after applying WHERE condition), grouping on a field from such a
8406 table will be optimized away and filesort without temporary table
8407 will be used unless we prevent that now. Filesort is not fit to
8408 handle joins and the join condition is not applied. We can't detect
8409 the case without an expensive test, however, so we force temporary
8410 table for all queries containing more than one table, ROLLUP, and an
8411 outer join.
8412 */
8413 (join->primary_tables > 1 &&
8414 join->rollup.state == ROLLUP::STATE_INITED &&
8415 join->outer_join))
8416 *simple_order=0; // Must do a temp table to sort
8417 else if (!(order_tables & not_const_tables))
8418 {
8419 if (order->item[0]->has_subquery() &&
8420 !(join->select_lex->options & SELECT_DESCRIBE))
8421 {
8422 Opt_trace_array trace_subselect(trace, "subselect_evaluation");
8423 order->item[0]->val_str(&order->item[0]->str_value);
8424 }
8425 trace_one_item.add("uses_only_constant_tables", true);
8426 continue; // skip const item
8427 }
8428 else if (duplicate_order(first_order, order))
8429 {
8430 /*
8431 If 'order' is a duplicate of an expression earlier in the
8432 ORDER/GROUP BY sequence, it can be removed from the ORDER BY
8433 or GROUP BY clause.
8434 */
8435 trace_one_item.add("duplicate_item", true);
8436 continue;
8437 }
8438 else if (order->in_field_list && order->item[0]->has_subquery())
8439 /*
8440 If the order item is a subquery that is also in the field
8441 list, a temp table should be used to avoid evaluating the
8442 subquery for each row both when a) creating a sort index and
8443 b) getting the value.
8444 Example: "SELECT (SELECT ... ) as a ... GROUP BY a;"
8445 */
8446 *simple_order= false;
8447 else
8448 {
8449 if (order_tables & (RAND_TABLE_BIT | OUTER_REF_TABLE_BIT))
8450 *simple_order=0;
8451 else
8452 {
8453 if (cond && const_expression_in_where(cond,order->item[0]))
8454 {
8455 trace_one_item.add("equals_constant_in_where", true);
8456 continue;
8457 }
8458 if ((ref=order_tables & (not_const_tables ^ first_table)))
8459 {
8460 if (!(order_tables & first_table) &&
8461 only_eq_ref_tables(join, first_order, ref,
8462 &cached_eq_ref_tables, &eq_ref_tables))
8463 {
8464 trace_one_item.add("eq_ref_to_preceding_items", true);
8465 continue;
8466 }
8467 *simple_order=0; // Must do a temp table to sort
8468 }
8469 }
8470 }
8471 if (change_list)
8472 *prev_ptr= order; // use this entry
8473 prev_ptr= &order->next;
8474 }
8475 if (change_list)
8476 *prev_ptr=0;
8477 if (prev_ptr == &first_order) // Nothing to sort/group
8478 *simple_order=1;
8479 DBUG_PRINT("exit",("simple_order: %d",(int) *simple_order));
8480
8481 trace_each_item.end();
8482 trace_simpl.add("resulting_clause_is_simple", *simple_order);
8483 if (trace->is_started() && change_list)
8484 {
8485 String str;
8486 st_select_lex::print_order(&str, first_order,
8487 enum_query_type(QT_TO_SYSTEM_CHARSET |
8488 QT_SHOW_SELECT_NUMBER |
8489 QT_NO_DEFAULT_DB));
8490 trace_simpl.add_utf8("resulting_clause", str.ptr(), str.length());
8491 }
8492
8493 DBUG_RETURN(first_order);
8494 }
8495
8496
8497 /**
8498 Optimize conditions by
8499
8500 a) applying transitivity to build multiple equality predicates
8501 (MEP): if x=y and y=z the MEP x=y=z is built.
8502 b) apply constants where possible. If the value of x is known to be
8503 42, x is replaced with a constant of value 42. By transitivity, this
8504 also applies to MEPs, so the MEP in a) will become 42=x=y=z.
8505 c) remove conditions that are impossible or always true
8506
8507 @param join pointer to the structure providing all context info
8508 for the query
8509 @param conds conditions to optimize
8510 @param join_list list of join tables to which the condition
8511 refers to
8512 @param[out] cond_value Not changed if conds was empty
8513 COND_TRUE if conds is always true
8514 COND_FALSE if conds is impossible
8515 COND_OK otherwise
8516
8517 @return optimized conditions
8518 */
8519 Item *
optimize_cond(THD * thd,Item * conds,COND_EQUAL ** cond_equal,List<TABLE_LIST> * join_list,bool build_equalities,Item::cond_result * cond_value)8520 optimize_cond(THD *thd, Item *conds, COND_EQUAL **cond_equal,
8521 List<TABLE_LIST> *join_list,
8522 bool build_equalities, Item::cond_result *cond_value)
8523 {
8524 Opt_trace_context * const trace= &thd->opt_trace;
8525 DBUG_ENTER("optimize_cond");
8526
8527 if (conds)
8528 {
8529 Opt_trace_object trace_wrapper(trace);
8530 Opt_trace_object trace_cond(trace, "condition_processing");
8531 trace_cond.add_alnum("condition", build_equalities ? "WHERE" : "HAVING");
8532 trace_cond.add("original_condition", conds);
8533 Opt_trace_array trace_steps(trace, "steps");
8534
8535 /*
8536 Build all multiple equality predicates and eliminate equality
8537 predicates that can be inferred from these multiple equalities.
8538 For each reference of a field included into a multiple equality
8539 that occurs in a function set a pointer to the multiple equality
8540 predicate. Substitute a constant instead of this field if the
8541 multiple equality contains a constant.
8542 */
8543 if (build_equalities)
8544 {
8545 Opt_trace_object step_wrapper(trace);
8546 step_wrapper.add_alnum("transformation", "equality_propagation");
8547 {
8548 Opt_trace_disable_I_S
8549 disable_trace_wrapper(trace, !conds->has_subquery());
8550 Opt_trace_array
8551 trace_subselect(trace, "subselect_evaluation");
8552 conds= build_equal_items(thd, conds, NULL, true,
8553 join_list, cond_equal);
8554 }
8555 step_wrapper.add("resulting_condition", conds);
8556 }
8557
8558 /* change field = field to field = const for each found field = const */
8559 {
8560 Opt_trace_object step_wrapper(trace);
8561 step_wrapper.add_alnum("transformation", "constant_propagation");
8562 {
8563 Opt_trace_disable_I_S
8564 disable_trace_wrapper(trace, !conds->has_subquery());
8565 Opt_trace_array
8566 trace_subselect(trace, "subselect_evaluation");
8567 propagate_cond_constants(thd, (I_List<COND_CMP> *) 0, conds, conds);
8568 }
8569 step_wrapper.add("resulting_condition", conds);
8570 }
8571
8572 /*
8573 Remove all instances of item == item
8574 Remove all and-levels where CONST item != CONST item
8575 */
8576 DBUG_EXECUTE("where",print_where(conds,"after const change", QT_ORDINARY););
8577 {
8578 Opt_trace_object step_wrapper(trace);
8579 step_wrapper.add_alnum("transformation", "trivial_condition_removal");
8580 {
8581 Opt_trace_disable_I_S
8582 disable_trace_wrapper(trace, !conds->has_subquery());
8583 Opt_trace_array trace_subselect(trace, "subselect_evaluation");
8584 conds= remove_eq_conds(thd, conds, cond_value) ;
8585 }
8586 step_wrapper.add("resulting_condition", conds);
8587 }
8588 }
8589 DBUG_RETURN(conds);
8590 }
8591
8592
8593 /**
8594 Handles the reqursive job for remove_eq_conds()
8595
8596 Remove const and eq items. Return new item, or NULL if no condition
8597 cond_value is set to according:
8598 COND_OK query is possible (field = constant)
8599 COND_TRUE always true ( 1 = 1 )
8600 COND_FALSE always false ( 1 = 2 )
8601
8602 SYNPOSIS
8603 remove_eq_conds()
8604 thd THD environment
8605 cond the condition to handle. Note that cond
8606 is changed by this function
8607 cond_value the resulting value of the condition
8608
8609 RETURN
8610 *Item with the simplified condition
8611 */
8612
8613 static Item *
internal_remove_eq_conds(THD * thd,Item * cond,Item::cond_result * cond_value)8614 internal_remove_eq_conds(THD *thd, Item *cond, Item::cond_result *cond_value)
8615 {
8616 if (cond->type() == Item::COND_ITEM)
8617 {
8618 bool and_level= ((Item_cond*) cond)->functype()
8619 == Item_func::COND_AND_FUNC;
8620 List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
8621 Item::cond_result tmp_cond_value;
8622 bool should_fix_fields=0;
8623
8624 *cond_value=Item::COND_UNDEF;
8625 Item *item;
8626 while ((item=li++))
8627 {
8628 Item *new_item=internal_remove_eq_conds(thd, item, &tmp_cond_value);
8629 if (!new_item)
8630 li.remove();
8631 else if (item != new_item)
8632 {
8633 (void) li.replace(new_item);
8634 should_fix_fields=1;
8635 }
8636 if (*cond_value == Item::COND_UNDEF)
8637 *cond_value=tmp_cond_value;
8638 switch (tmp_cond_value) {
8639 case Item::COND_OK: // Not TRUE or FALSE
8640 if (and_level || *cond_value == Item::COND_FALSE)
8641 *cond_value=tmp_cond_value;
8642 break;
8643 case Item::COND_FALSE:
8644 if (and_level)
8645 {
8646 *cond_value=tmp_cond_value;
8647 return (Item*) 0; // Always false
8648 }
8649 break;
8650 case Item::COND_TRUE:
8651 if (!and_level)
8652 {
8653 *cond_value= tmp_cond_value;
8654 return (Item*) 0; // Always true
8655 }
8656 break;
8657 case Item::COND_UNDEF: // Impossible
8658 break; /* purecov: deadcode */
8659 }
8660 }
8661 if (should_fix_fields)
8662 cond->update_used_tables();
8663
8664 if (!((Item_cond*) cond)->argument_list()->elements ||
8665 *cond_value != Item::COND_OK)
8666 return (Item*) 0;
8667 if (((Item_cond*) cond)->argument_list()->elements == 1)
8668 {
8669 /*
8670 BUG#11765699:
8671 We're dealing with an AND or OR item that has only one
8672 argument. However, it is not an option to empty the list
8673 because:
8674
8675 - this function is called for either JOIN::conds or
8676 JOIN::having, but these point to the same condition as
8677 SELECT_LEX::where and SELECT_LEX::having do.
8678
8679 - The return value of remove_eq_conds() is assigned to
8680 JOIN::conds and JOIN::having, so emptying the list and
8681 returning the only remaining item "replaces" the AND or OR
8682 with item for the variables in JOIN. However, the return
8683 value is not assigned to the SELECT_LEX counterparts. Thus,
8684 if argument_list is emptied, SELECT_LEX forgets the item in
8685 argument_list()->head().
8686
8687 item is therefore returned, but argument_list is not emptied.
8688 */
8689 item= ((Item_cond*) cond)->argument_list()->head();
8690 /*
8691 Consider reenabling the line below when the optimizer has been
8692 split into properly separated phases.
8693
8694 ((Item_cond*) cond)->argument_list()->empty();
8695 */
8696 return item;
8697 }
8698 }
8699 else if (cond->type() == Item::FUNC_ITEM &&
8700 ((Item_func*) cond)->functype() == Item_func::ISNULL_FUNC)
8701 {
8702 Item_func_isnull *func=(Item_func_isnull*) cond;
8703 Item **args= func->arguments();
8704 if (args[0]->type() == Item::FIELD_ITEM)
8705 {
8706 Field *field=((Item_field*) args[0])->field;
8707 /* fix to replace 'NULL' dates with '0' (shreeve@uci.edu) */
8708 /*
8709 See BUG#12594011
8710 Documentation says that
8711 SELECT datetime_notnull d FROM t1 WHERE d IS NULL
8712 shall return rows where d=='0000-00-00'
8713
8714 Thus, for DATE and DATETIME columns defined as NOT NULL,
8715 "date_notnull IS NULL" has to be modified to
8716 "date_notnull IS NULL OR date_notnull == 0" (if outer join)
8717 "date_notnull == 0" (otherwise)
8718
8719 */
8720 if (((field->type() == MYSQL_TYPE_DATE) ||
8721 (field->type() == MYSQL_TYPE_DATETIME)) &&
8722 (field->flags & NOT_NULL_FLAG))
8723 {
8724 Item *item0= new(thd->mem_root) Item_int((longlong)0, 1);
8725 Item *eq_cond= new(thd->mem_root) Item_func_eq(args[0], item0);
8726 if (!eq_cond)
8727 return cond;
8728
8729 if (args[0]->is_outer_field())
8730 {
8731 // outer join: transform "col IS NULL" to "col IS NULL or col=0"
8732 Item *or_cond= new(thd->mem_root) Item_cond_or(eq_cond, cond);
8733 if (!or_cond)
8734 return cond;
8735 cond= or_cond;
8736 }
8737 else
8738 {
8739 // not outer join: transform "col IS NULL" to "col=0"
8740 cond= eq_cond;
8741 }
8742
8743 cond->fix_fields(thd, &cond);
8744 }
8745 }
8746 if (cond->const_item())
8747 {
8748 *cond_value= eval_const_cond(cond) ? Item::COND_TRUE : Item::COND_FALSE;
8749 return (Item*) 0;
8750 }
8751 }
8752 else if (cond->const_item() && !cond->is_expensive())
8753 {
8754 *cond_value= eval_const_cond(cond) ? Item::COND_TRUE : Item::COND_FALSE;
8755 return (Item*) 0;
8756 }
8757 else if ((*cond_value= cond->eq_cmp_result()) != Item::COND_OK)
8758 { // boolan compare function
8759 Item *left_item= ((Item_func*) cond)->arguments()[0];
8760 Item *right_item= ((Item_func*) cond)->arguments()[1];
8761 if (left_item->eq(right_item,1))
8762 {
8763 if (!left_item->maybe_null ||
8764 ((Item_func*) cond)->functype() == Item_func::EQUAL_FUNC)
8765 return (Item*) 0; // Compare of identical items
8766 }
8767 }
8768 *cond_value=Item::COND_OK;
8769 return cond; // Point at next and level
8770 }
8771
8772
8773 /**
8774 Remove const and eq items. Return new item, or NULL if no condition
8775 cond_value is set to according:
8776 COND_OK query is possible (field = constant)
8777 COND_TRUE always true ( 1 = 1 )
8778 COND_FALSE always false ( 1 = 2 )
8779
8780 SYNPOSIS
8781 remove_eq_conds()
8782 thd THD environment
8783 cond the condition to handle
8784 cond_value the resulting value of the condition
8785
8786 NOTES
8787 calls the inner_remove_eq_conds to check all the tree reqursively
8788
8789 RETURN
8790 *Item with the simplified condition
8791 */
8792
8793 Item *
remove_eq_conds(THD * thd,Item * cond,Item::cond_result * cond_value)8794 remove_eq_conds(THD *thd, Item *cond, Item::cond_result *cond_value)
8795 {
8796 if (cond->type() == Item::FUNC_ITEM &&
8797 ((Item_func*) cond)->functype() == Item_func::ISNULL_FUNC)
8798 {
8799 /*
8800 Handles this special case for some ODBC applications:
8801 The are requesting the row that was just updated with a auto_increment
8802 value with this construct:
8803
8804 SELECT * from table_name where auto_increment_column IS NULL
8805 This will be changed to:
8806 SELECT * from table_name where auto_increment_column = LAST_INSERT_ID
8807 */
8808
8809 Item_func_isnull *func=(Item_func_isnull*) cond;
8810 Item **args= func->arguments();
8811 if (args[0]->type() == Item::FIELD_ITEM)
8812 {
8813 Field *field=((Item_field*) args[0])->field;
8814 if (field->flags & AUTO_INCREMENT_FLAG && !field->table->maybe_null &&
8815 (thd->variables.option_bits & OPTION_AUTO_IS_NULL) &&
8816 (thd->first_successful_insert_id_in_prev_stmt > 0 &&
8817 thd->substitute_null_with_insert_id))
8818 {
8819 #ifdef HAVE_QUERY_CACHE
8820 query_cache_abort(&thd->query_cache_tls);
8821 #endif
8822 Item *new_cond;
8823 if ((new_cond= new Item_func_eq(args[0],
8824 new Item_int(NAME_STRING("last_insert_id()"),
8825 thd->read_first_successful_insert_id_in_prev_stmt(),
8826 MY_INT64_NUM_DECIMAL_DIGITS))))
8827 {
8828 cond=new_cond;
8829 /*
8830 Item_func_eq can't be fixed after creation so we do not check
8831 cond->fixed, also it do not need tables so we use 0 as second
8832 argument.
8833 */
8834 cond->fix_fields(thd, &cond);
8835 }
8836 /*
8837 IS NULL should be mapped to LAST_INSERT_ID only for first row, so
8838 clear for next row
8839 */
8840 thd->substitute_null_with_insert_id= FALSE;
8841
8842 *cond_value= Item::COND_OK;
8843 return cond;
8844 }
8845 }
8846 }
8847 return internal_remove_eq_conds(thd, cond, cond_value); // Scan all the condition
8848 }
8849
8850
8851 /**
8852 Check if GROUP BY/DISTINCT can be optimized away because the set is
8853 already known to be distinct.
8854
8855 Used in removing the GROUP BY/DISTINCT of the following types of
8856 statements:
8857 @code
8858 SELECT [DISTINCT] <unique_key_cols>... FROM <single_table_ref>
8859 [GROUP BY <unique_key_cols>,...]
8860 @endcode
8861
8862 If (a,b,c is distinct)
8863 then <any combination of a,b,c>,{whatever} is also distinct
8864
8865 This function checks if all the key parts of any of the unique keys
8866 of the table are referenced by a list : either the select list
8867 through find_field_in_item_list or GROUP BY list through
8868 find_field_in_order_list.
8869 If the above holds and the key parts cannot contain NULLs then we
8870 can safely remove the GROUP BY/DISTINCT,
8871 as no result set can be more distinct than an unique key.
8872
8873 @param tab The join table to operate on.
8874 @param find_func function to iterate over the list and search
8875 for a field
8876
8877 @retval
8878 1 found
8879 @retval
8880 0 not found.
8881
8882 @note
8883 The function assumes that make_outerjoin_info() has been called in
8884 order for the check for outer tables to work.
8885 */
8886
8887 static bool
list_contains_unique_index(JOIN_TAB * tab,bool (* find_func)(Field *,void *),void * data)8888 list_contains_unique_index(JOIN_TAB *tab,
8889 bool (*find_func) (Field *, void *), void *data)
8890 {
8891 TABLE *table= tab->table;
8892
8893 if (tab->is_inner_table_of_outer_join())
8894 return 0;
8895 for (uint keynr= 0; keynr < table->s->keys; keynr++)
8896 {
8897 if (keynr == table->s->primary_key ||
8898 (table->key_info[keynr].flags & HA_NOSAME))
8899 {
8900 KEY *keyinfo= table->key_info + keynr;
8901 KEY_PART_INFO *key_part, *key_part_end;
8902
8903 for (key_part=keyinfo->key_part,
8904 key_part_end=key_part+ keyinfo->user_defined_key_parts;
8905 key_part < key_part_end;
8906 key_part++)
8907 {
8908 if (key_part->field->real_maybe_null() ||
8909 !find_func(key_part->field, data))
8910 break;
8911 }
8912 if (key_part == key_part_end)
8913 return 1;
8914 }
8915 }
8916 return 0;
8917 }
8918
8919
8920 /**
8921 Helper function for list_contains_unique_index.
8922 Find a field reference in a list of ORDER structures.
8923 Finds a direct reference of the Field in the list.
8924
8925 @param field The field to search for.
8926 @param data ORDER *.The list to search in
8927
8928 @retval
8929 1 found
8930 @retval
8931 0 not found.
8932 */
8933
8934 static bool
find_field_in_order_list(Field * field,void * data)8935 find_field_in_order_list (Field *field, void *data)
8936 {
8937 ORDER *group= (ORDER *) data;
8938 bool part_found= 0;
8939 for (ORDER *tmp_group= group; tmp_group; tmp_group=tmp_group->next)
8940 {
8941 Item *item= (*tmp_group->item)->real_item();
8942 if (item->type() == Item::FIELD_ITEM &&
8943 ((Item_field*) item)->field->eq(field))
8944 {
8945 part_found= 1;
8946 break;
8947 }
8948 }
8949 return part_found;
8950 }
8951
8952
8953 /**
8954 Helper function for list_contains_unique_index.
8955 Find a field reference in a dynamic list of Items.
8956 Finds a direct reference of the Field in the list.
8957
8958 @param[in] field The field to search for.
8959 @param[in] data List<Item> *.The list to search in
8960
8961 @retval
8962 1 found
8963 @retval
8964 0 not found.
8965 */
8966
8967 static bool
find_field_in_item_list(Field * field,void * data)8968 find_field_in_item_list (Field *field, void *data)
8969 {
8970 List<Item> *fields= (List<Item> *) data;
8971 bool part_found= 0;
8972 List_iterator<Item> li(*fields);
8973 Item *item;
8974
8975 while ((item= li++))
8976 {
8977 if (item->type() == Item::FIELD_ITEM &&
8978 ((Item_field*) item)->field->eq(field))
8979 {
8980 part_found= 1;
8981 break;
8982 }
8983 }
8984 return part_found;
8985 }
8986
8987
8988 /**
8989 Create a group by that consist of all non const fields.
8990
8991 Try to use the fields in the order given by 'order' to allow one to
8992 optimize away 'order by'.
8993 */
8994
8995 static ORDER *
create_distinct_group(THD * thd,Ref_ptr_array ref_pointer_array,ORDER * order_list,List<Item> & fields,List<Item> & all_fields,bool * all_order_by_fields_used)8996 create_distinct_group(THD *thd, Ref_ptr_array ref_pointer_array,
8997 ORDER *order_list, List<Item> &fields,
8998 List<Item> &all_fields,
8999 bool *all_order_by_fields_used)
9000 {
9001 List_iterator<Item> li(fields);
9002 Item *item;
9003 Ref_ptr_array orig_ref_pointer_array= ref_pointer_array;
9004 ORDER *order,*group,**prev;
9005
9006 *all_order_by_fields_used= 1;
9007 while ((item=li++))
9008 item->marker=0; /* Marker that field is not used */
9009
9010 prev= &group; group=0;
9011 for (order=order_list ; order; order=order->next)
9012 {
9013 if (order->in_field_list)
9014 {
9015 ORDER *ord=(ORDER*) thd->memdup((char*) order,sizeof(ORDER));
9016 if (!ord)
9017 return 0;
9018 *prev=ord;
9019 prev= &ord->next;
9020 (*ord->item)->marker=1;
9021 }
9022 else
9023 *all_order_by_fields_used= 0;
9024 }
9025
9026 li.rewind();
9027 while ((item=li++))
9028 {
9029 if (!item->const_item() && !item->with_sum_func && !item->marker)
9030 {
9031 /*
9032 Don't put duplicate columns from the SELECT list into the
9033 GROUP BY list.
9034 */
9035 ORDER *ord_iter;
9036 for (ord_iter= group; ord_iter; ord_iter= ord_iter->next)
9037 if ((*ord_iter->item)->eq(item, 1))
9038 goto next_item;
9039
9040 ORDER *ord=(ORDER*) thd->calloc(sizeof(ORDER));
9041 if (!ord)
9042 return 0;
9043
9044 if (item->type() == Item::FIELD_ITEM &&
9045 item->field_type() == MYSQL_TYPE_BIT)
9046 {
9047 /*
9048 Because HEAP tables can't index BIT fields we need to use an
9049 additional hidden field for grouping because later it will be
9050 converted to a LONG field. Original field will remain of the
9051 BIT type and will be returned to a client.
9052 @note setup_ref_array() needs to account for the extra space.
9053 */
9054 Item_field *new_item= new Item_field(thd, (Item_field*)item);
9055 int el= all_fields.elements;
9056 orig_ref_pointer_array[el]= new_item;
9057 all_fields.push_front(new_item);
9058 ord->item= &orig_ref_pointer_array[el];
9059 }
9060 else
9061 {
9062 /*
9063 We have here only field_list (not all_field_list), so we can use
9064 simple indexing of ref_pointer_array (order in the array and in the
9065 list are same)
9066 */
9067 ord->item= &ref_pointer_array[0];
9068 }
9069 ord->direction= ORDER::ORDER_ASC;
9070 *prev=ord;
9071 prev= &ord->next;
9072 }
9073 next_item:
9074 ref_pointer_array.pop_front();
9075 }
9076 *prev=0;
9077 return group;
9078 }
9079
9080
9081 /**
9082 Return table number if there is only one table in sort order
9083 and group and order is compatible, else return 0.
9084 */
9085
9086 static TABLE *
get_sort_by_table(ORDER * a,ORDER * b,TABLE_LIST * tables)9087 get_sort_by_table(ORDER *a,ORDER *b,TABLE_LIST *tables)
9088 {
9089 table_map map= (table_map) 0;
9090 DBUG_ENTER("get_sort_by_table");
9091
9092 if (!a)
9093 a=b; // Only one need to be given
9094 else if (!b)
9095 b=a;
9096
9097 for (; a && b; a=a->next,b=b->next)
9098 {
9099 if (!(*a->item)->eq(*b->item,1))
9100 DBUG_RETURN(0);
9101 map|=a->item[0]->used_tables();
9102 }
9103 map&= ~PARAM_TABLE_BIT;
9104 if (!map || (map & (RAND_TABLE_BIT | OUTER_REF_TABLE_BIT)))
9105 DBUG_RETURN(0);
9106
9107 for (; !(map & tables->table->map); tables= tables->next_leaf) ;
9108 if (map != tables->table->map)
9109 DBUG_RETURN(0); // More than one table
9110 DBUG_PRINT("exit",("sort by table: %d",tables->table->tablenr));
9111 DBUG_RETURN(tables->table);
9112 }
9113
9114
9115 /**
9116 Create a condition for a const reference for a table.
9117
9118 @param thd THD pointer
9119 @param join_tab pointer to the table
9120
9121 @return A pointer to the created condition for the const reference.
9122 @retval !NULL if the condition was created successfully
9123 @retval NULL if an error has occured
9124 */
9125
create_cond_for_const_ref(THD * thd,JOIN_TAB * join_tab)9126 static Item_cond_and *create_cond_for_const_ref(THD *thd, JOIN_TAB *join_tab)
9127 {
9128 DBUG_ENTER("create_cond_for_const_ref");
9129 DBUG_ASSERT(join_tab->ref.key_parts);
9130
9131 TABLE *table= join_tab->table;
9132 Item_cond_and *cond= new Item_cond_and();
9133 if (!cond)
9134 DBUG_RETURN(NULL);
9135
9136 for (uint i=0 ; i < join_tab->ref.key_parts ; i++)
9137 {
9138 Field *field= table->field[table->key_info[join_tab->ref.key].key_part[i].
9139 fieldnr-1];
9140 Item *value= join_tab->ref.items[i];
9141 Item *item= new Item_field(field);
9142 if (!item)
9143 DBUG_RETURN(NULL);
9144 item= join_tab->ref.null_rejecting & ((key_part_map)1 << i) ?
9145 (Item *)new Item_func_eq(item, value) :
9146 (Item *)new Item_func_equal(item, value);
9147 if (!item)
9148 DBUG_RETURN(NULL);
9149 if (cond->add(item))
9150 DBUG_RETURN(NULL);
9151 }
9152 cond->fix_fields(thd, (Item**)&cond);
9153
9154 DBUG_RETURN(cond);
9155 }
9156
9157 /**
9158 Create a condition for a const reference and add this to the
9159 currenct select for the table.
9160 */
9161
add_ref_to_table_cond(THD * thd,JOIN_TAB * join_tab)9162 static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab)
9163 {
9164 DBUG_ENTER("add_ref_to_table_cond");
9165 if (!join_tab->ref.key_parts)
9166 DBUG_RETURN(FALSE);
9167
9168 int error= 0;
9169
9170 /* Create a condition representing the const reference. */
9171 Item_cond_and *cond= create_cond_for_const_ref(thd, join_tab);
9172 if (!cond)
9173 DBUG_RETURN(TRUE);
9174
9175 /* Add this condition to the existing select condtion */
9176 if (join_tab->select)
9177 {
9178 if (join_tab->select->cond)
9179 {
9180 error=(int) cond->add(join_tab->select->cond);
9181 cond->update_used_tables();
9182 }
9183 join_tab->set_jt_and_sel_condition(cond, __LINE__);
9184 }
9185 else if ((join_tab->select= make_select(join_tab->table, 0, 0, cond, 0,
9186 &error)))
9187 join_tab->set_condition(cond, __LINE__);
9188
9189 if (join_tab->select)
9190 Opt_trace_object(&thd->opt_trace).add("added_back_ref_condition", cond);
9191 /*
9192 If we have pushed parts of the select condition down to the
9193 storage engine we also need to add the condition for the const
9194 reference to the pre_idx_push_cond since this might be used
9195 later (in test_if_skip_sort_order()) instead of the condition.
9196 */
9197 if (join_tab->pre_idx_push_cond)
9198 {
9199 cond= create_cond_for_const_ref(thd, join_tab);
9200 if (!cond)
9201 DBUG_RETURN(TRUE);
9202 if (cond->add(join_tab->pre_idx_push_cond))
9203 DBUG_RETURN(TRUE);
9204 join_tab->pre_idx_push_cond = cond;
9205 }
9206
9207 DBUG_RETURN(error ? TRUE : FALSE);
9208 }
9209
9210
9211 /**
9212 Remove additional condition inserted by IN/ALL/ANY transformation.
9213
9214 @param conds condition for processing
9215
9216 @return
9217 new conditions
9218
9219 @note that this function has Bug#13915291.
9220 */
9221
remove_additional_cond(Item * conds)9222 static Item *remove_additional_cond(Item* conds)
9223 {
9224 // Because it uses in_additional_cond it applies only to the scalar case.
9225 if (conds->item_name.ptr() == in_additional_cond)
9226 return 0;
9227 if (conds->type() == Item::COND_ITEM)
9228 {
9229 Item_cond *cnd= (Item_cond*) conds;
9230 List_iterator<Item> li(*(cnd->argument_list()));
9231 Item *item;
9232 while ((item= li++))
9233 {
9234 if (item->item_name.ptr() == in_additional_cond)
9235 {
9236 li.remove();
9237 if (cnd->argument_list()->elements == 1)
9238 return cnd->argument_list()->head();
9239 return conds;
9240 }
9241 }
9242 }
9243 return conds;
9244 }
9245
9246
9247 /*
9248 Index lookup-based subquery: save some flags for EXPLAIN output
9249
9250 SYNOPSIS
9251 save_index_subquery_explain_info()
9252 join_tab Subquery's join tab (there is only one as index lookup is
9253 only used for subqueries that are single-table SELECTs)
9254 where Subquery's WHERE clause
9255
9256 DESCRIPTION
9257 For index lookup-based subquery (subselect_indexsubquery_engine),
9258 check its EXPLAIN output row should contain
9259 "Using index" (TAB_INFO_FULL_SCAN_ON_NULL)
9260 "Using Where" (TAB_INFO_USING_WHERE)
9261 "Full scan on NULL key" (TAB_INFO_FULL_SCAN_ON_NULL)
9262 and set appropriate flags in join_tab->packed_info.
9263
9264 TODO:
9265 packed_info causes duplication in EXPLAIN code. For example, we print
9266 "using where" in 2 places of EXPLAIN code: if tab->condition(), OR if
9267 'packed_info & TAB_INFO_USING_WHERE'.
9268 indexsubquery_engine is the only user of
9269 save_index_subquery_explain_info().
9270 packed_info is almost useless today, it would be good to get rid of it
9271 (and thus of save_index_subquery_explain_info()).
9272 */
9273
save_index_subquery_explain_info(JOIN_TAB * join_tab,Item * where)9274 static void save_index_subquery_explain_info(JOIN_TAB *join_tab, Item* where)
9275 {
9276 join_tab->packed_info= TAB_INFO_HAVE_VALUE;
9277
9278 /*
9279 This is actually not needed, 'non-packed-info' branch of EXPLAIN naturally
9280 reads covering_keys and produces the desired 'Using index'
9281 */
9282 if (join_tab->table->covering_keys.is_set(join_tab->ref.key))
9283 join_tab->packed_info |= TAB_INFO_USING_INDEX;
9284
9285 /*
9286 This is needed, because 'where' (==join->conds) may be NULL, or
9287 shorter than select->cond/tab->condition(), due to
9288 remove_subq_pushed_predicates() and remove_additional_cond(); the real
9289 condition which will be checked for each row is
9290 indexsubquery_engine::cond (==join->conds).
9291 Still this should be solvable without TAB_INFO_USING_WHERE.
9292 */
9293 if (where)
9294 join_tab->packed_info |= TAB_INFO_USING_WHERE;
9295
9296 /*
9297 This is actually not needed, 'non-packed-info' branch of EXPLAIN naturally
9298 reads has_guarded_conds() and produces the desired 'Full scan on NULL
9299 key'.
9300 */
9301 if (join_tab->has_guarded_conds())
9302 join_tab->packed_info|= TAB_INFO_FULL_SCAN_ON_NULL;
9303 }
9304
9305
9306 /**
9307 Update some values in keyuse for faster choose_table_order() loop.
9308 */
9309
optimize_keyuse(JOIN * join,Key_use_array * keyuse_array)9310 static void optimize_keyuse(JOIN *join, Key_use_array *keyuse_array)
9311 {
9312 for (size_t ix= 0; ix < keyuse_array->size(); ++ix)
9313 {
9314 Key_use *keyuse= &keyuse_array->at(ix);
9315 table_map map;
9316 /*
9317 If we find a ref, assume this table matches a proportional
9318 part of this table.
9319 For example 100 records matching a table with 5000 records
9320 gives 5000/100 = 50 records per key
9321 Constant tables are ignored.
9322 To avoid bad matches, we don't make ref_table_rows less than 100.
9323 */
9324 keyuse->ref_table_rows= ~(ha_rows) 0; // If no ref
9325 if (keyuse->used_tables &
9326 (map= (keyuse->used_tables & ~join->const_table_map &
9327 ~OUTER_REF_TABLE_BIT)))
9328 {
9329 uint tablenr;
9330 for (tablenr=0 ; ! (map & 1) ; map>>=1, tablenr++) ;
9331 if (map == 1) // Only one table
9332 {
9333 TABLE *tmp_table= join->join_tab[tablenr].table;
9334 keyuse->ref_table_rows= max<ha_rows>(tmp_table->file->stats.records, 100);
9335 }
9336 }
9337 /*
9338 Outer reference (external field) is constant for single executing
9339 of subquery
9340 */
9341 if (keyuse->used_tables == OUTER_REF_TABLE_BIT)
9342 keyuse->ref_table_rows= 1;
9343 }
9344 }
9345
9346
optimize_fts_query()9347 void JOIN::optimize_fts_query()
9348 {
9349 if (primary_tables > 1)
9350 return; // We only optimize single table FTS queries
9351
9352 JOIN_TAB * const tab= &(join_tab[0]);
9353 if (tab->type != JT_FT)
9354 return; // Access is not using FTS result
9355
9356 if ((tab->table->file->ha_table_flags() & HA_CAN_FULLTEXT_EXT) == 0)
9357 return; // Optimizations requires extended FTS support by table engine
9358
9359 Item_func_match* fts_result= static_cast<Item_func_match*>(tab->keyuse->val);
9360
9361 /* If we are ordering on the rank of the same result as is used for access,
9362 and the table engine deliver result ordered by rank, we can drop ordering.
9363 */
9364 if (order != NULL
9365 && order->next == NULL &&
9366 order->direction == ORDER::ORDER_DESC &&
9367 fts_result->eq(*(order->item), true))
9368 {
9369 Item_func_match* fts_item=
9370 static_cast<Item_func_match*>(*(order->item));
9371
9372 /* If we applied the LIMIT optimization @see optimize_fts_limit_query,
9373 check that the number of matching rows is sufficient.
9374 Otherwise, revert this optimization and use table scan instead.
9375 */
9376 if (min_ft_matches != HA_POS_ERROR &&
9377 min_ft_matches > fts_item->get_count())
9378 {
9379 // revert to table scan, do things make_join_readinfo would have done
9380 tab->type= JT_ALL;
9381 tab->read_first_record= join_init_read_record;
9382 tab->use_quick= QS_NONE;
9383 tab->ref.key= -1;
9384
9385 // Reset join condition
9386 tab->select->cond= NULL;
9387 conds= NULL;
9388
9389 thd->set_status_no_index_used();
9390 // make_join_readinfo only calls inc_status_select_scan()
9391 // when this is not SELECT_DESCRIBE
9392 DBUG_ASSERT((select_options & SELECT_DESCRIBE) == 0);
9393 thd->inc_status_select_scan();
9394
9395 return;
9396 }
9397 else if (fts_item->ordered_result())
9398 order= NULL;
9399 }
9400
9401 /* Check whether the FTS result is covering. If only document id
9402 and rank is needed, there is no need to access table rows.
9403 */
9404 List_iterator<Item> it(all_fields);
9405 Item *item;
9406 // This optimization does not work with filesort nor GROUP BY
9407 bool covering= (!order && !group);
9408 bool docid_found= false;
9409 while (covering && (item= it++))
9410 {
9411 switch (item->type()) {
9412 case Item::FIELD_ITEM:
9413 {
9414 Item_field *item_field= static_cast<Item_field*>(item);
9415 if (strcmp(item_field->field_name, FTS_DOC_ID_COL_NAME) == 0)
9416 {
9417 docid_found= true;
9418 covering= fts_result->docid_in_result();
9419 }
9420 else
9421 covering= false;
9422 break;
9423 }
9424 case Item::FUNC_ITEM:
9425 if (static_cast<Item_func*>(item)->functype() == Item_func::FT_FUNC)
9426 {
9427 Item_func_match* fts_item= static_cast<Item_func_match*>(item);
9428 if (fts_item->eq(fts_result, true))
9429 break;
9430 }
9431 // Fall-through when not an equivalent MATCH expression
9432 default:
9433 covering= false;
9434 }
9435 }
9436
9437 if (covering)
9438 {
9439 if (docid_found)
9440 {
9441 replace_item_field(FTS_DOC_ID_COL_NAME,
9442 new Item_func_docid(reinterpret_cast<FT_INFO_EXT*>
9443 (fts_result->ft_handler)));
9444 }
9445
9446 // Tell storage engine that row access is not necessary
9447 fts_result->table->set_keyread(true);
9448 fts_result->table->covering_keys.set_bit(fts_result->key);
9449 }
9450 }
9451
9452
9453 /**
9454 Optimize FTS queries with ORDER BY/LIMIT, but no WHERE clause.
9455
9456 If MATCH expression is not in WHERE clause, but in ORDER BY,
9457 JT_FT access will not apply. However, if we are ordering on rank and
9458 there is a limit, normally, only the top ranking rows are needed
9459 returned, and one would benefit from the optimizations associated
9460 with JT_FT acess (@see optimize_fts_query). To get JT_FT access we
9461 will add the MATCH expression to the WHERE clause.
9462
9463 @note This optimization will only be applied to single table
9464 queries with no existing WHERE clause.
9465 @note This transformation is not correct if number of matches
9466 is less than the number of rows requested by limit.
9467 If this turns out to be the case, the transformation will
9468 be reverted @see optimize_fts_query()
9469 */
9470 void
optimize_fts_limit_query()9471 JOIN::optimize_fts_limit_query()
9472 {
9473 /*
9474 Only do this optimization if
9475 1. It is a single table query
9476 2. There is no WHERE condition
9477 3. There is a single ORDER BY element
9478 4. Ordering is descending
9479 5. There is a LIMIT clause
9480 6. Ordering is on a MATCH expression
9481 */
9482 if (primary_tables == 1 && // 1
9483 conds == NULL && // 2
9484 order && order->next == NULL && // 3
9485 order->direction == ORDER::ORDER_DESC && // 4
9486 m_select_limit != HA_POS_ERROR) // 5
9487 {
9488 DBUG_ASSERT(order->item);
9489 Item* item= *order->item;
9490 DBUG_ASSERT(item);
9491
9492 if (item->type() == Item::FUNC_ITEM &&
9493 static_cast<Item_func*>(item)->functype() == Item_func::FT_FUNC) // 6
9494 {
9495 conds= item;
9496 min_ft_matches= m_select_limit;
9497 }
9498 }
9499 }
9500
9501
9502 /**
9503 For {semijoin,subquery} materialization: calculates various cost
9504 information, based on a plan in join->best_positions covering the
9505 to-be-materialized query block and only this.
9506
9507 @param join JOIN where plan can be found
9508 @param sj_nest sj materialization nest (NULL if subquery materialization)
9509 @param n_tables number of to-be-materialized tables
9510 @param[out] sjm where computed costs will be stored
9511
9512 @note that this function modifies join->map2table, which has to be filled
9513 correctly later.
9514 */
calculate_materialization_costs(JOIN * join,TABLE_LIST * sj_nest,uint n_tables,Semijoin_mat_optimize * sjm)9515 static void calculate_materialization_costs(JOIN *join,
9516 TABLE_LIST *sj_nest,
9517 uint n_tables,
9518 Semijoin_mat_optimize *sjm)
9519 {
9520 double mat_cost; // Estimated cost of materialization
9521 double mat_rowcount; // Estimated row count before duplicate removal
9522 double distinct_rowcount; // Estimated rowcount after duplicate removal
9523 List<Item> *inner_expr_list;
9524
9525 if (sj_nest)
9526 {
9527 /*
9528 get_partial_join_cost() assumes a regular join, which is correct when
9529 we optimize a sj-materialization nest (always executed as regular
9530 join).
9531 @todo consider using join->best_rowcount instead.
9532 */
9533 get_partial_join_cost(join, n_tables,
9534 &mat_cost, &mat_rowcount);
9535 n_tables+= join->const_tables;
9536 inner_expr_list= &sj_nest->nested_join->sj_inner_exprs;
9537 }
9538 else
9539 {
9540 mat_cost= join->best_read;
9541 mat_rowcount= join->best_rowcount;
9542 inner_expr_list= &join->select_lex->item_list;
9543 }
9544
9545 /*
9546 Adjust output cardinality estimates. If the subquery has form
9547
9548 ... oe IN (SELECT t1.colX, t2.colY, func(X,Y,Z) )
9549
9550 then the number of distinct output record combinations has an
9551 upper bound of product of number of records matching the tables
9552 that are used by the SELECT clause.
9553 TODO:
9554 We can get a more precise estimate if we
9555 - use rec_per_key cardinality estimates. For simple cases like
9556 "oe IN (SELECT t.key ...)" it is trivial.
9557 - Functional dependencies between the tables in the semi-join
9558 nest (the payoff is probably less here?)
9559 */
9560 {
9561 for (uint i=0 ; i < n_tables ; i++)
9562 {
9563 JOIN_TAB * const tab= join->best_positions[i].table;
9564 join->map2table[tab->table->tablenr]= tab;
9565 }
9566 List_iterator<Item> it(*inner_expr_list);
9567 Item *item;
9568 table_map map= 0;
9569 while ((item= it++))
9570 map|= item->used_tables();
9571 map&= ~PSEUDO_TABLE_BITS;
9572 Table_map_iterator tm_it(map);
9573 int tableno;
9574 double rows= 1.0;
9575 while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
9576 rows*= join->map2table[tableno]->table->quick_condition_rows;
9577 distinct_rowcount= min(mat_rowcount, rows);
9578 }
9579 /*
9580 Calculate temporary table parameters and usage costs
9581 */
9582 const uint rowlen= get_tmp_table_rec_length(*inner_expr_list);
9583
9584 double row_cost; // The cost to write or lookup a row in temp. table
9585 double create_cost; // The cost to create a temporary table
9586 if (rowlen * distinct_rowcount <
9587 join->thd->variables.max_heap_table_size)
9588 {
9589 row_cost= HEAP_TEMPTABLE_ROW_COST;
9590 create_cost= HEAP_TEMPTABLE_CREATE_COST;
9591 }
9592 else
9593 {
9594 row_cost= DISK_TEMPTABLE_ROW_COST;
9595 create_cost= DISK_TEMPTABLE_CREATE_COST;
9596 }
9597
9598 /*
9599 Let materialization cost include the cost to create the temporary
9600 table and write the rows into it:
9601 */
9602 mat_cost+= create_cost + (mat_rowcount * row_cost);
9603 sjm->materialization_cost.reset();
9604 sjm->materialization_cost
9605 .add_io(mat_cost);
9606
9607 sjm->expected_rowcount= distinct_rowcount;
9608
9609 /*
9610 Set the cost to do a full scan of the temptable (will need this to
9611 consider doing sjm-scan):
9612 */
9613 sjm->scan_cost.reset();
9614 if (distinct_rowcount > 0.0)
9615 sjm->scan_cost.add_io(distinct_rowcount * row_cost);
9616
9617 sjm->lookup_cost.reset();
9618 sjm->lookup_cost.add_io(row_cost);
9619 }
9620
9621
9622 /**
9623 Decides between EXISTS and materialization; performs last steps to set up
9624 the chosen strategy.
9625 @returns 'false' if no error
9626
9627 @note If UNION this is called on each contained JOIN.
9628
9629 */
decide_subquery_strategy()9630 bool JOIN::decide_subquery_strategy()
9631 {
9632 DBUG_ASSERT(unit->item);
9633
9634 switch (unit->item->substype())
9635 {
9636 case Item_subselect::IN_SUBS:
9637 case Item_subselect::ALL_SUBS:
9638 case Item_subselect::ANY_SUBS:
9639 // All of those are children of Item_in_subselect and may use EXISTS
9640 break;
9641 default:
9642 return false;
9643 }
9644
9645 Item_in_subselect * const in_pred=
9646 static_cast<Item_in_subselect *>(unit->item);
9647
9648 Item_exists_subselect::enum_exec_method chosen_method= in_pred->exec_method;
9649 // Materialization does not allow UNION so this can't happen:
9650 DBUG_ASSERT(chosen_method != Item_exists_subselect::EXEC_MATERIALIZATION);
9651
9652 if ((chosen_method == Item_exists_subselect::EXEC_EXISTS_OR_MAT) &&
9653 compare_costs_of_subquery_strategies(&chosen_method))
9654 return true;
9655
9656 switch (chosen_method)
9657 {
9658 case Item_exists_subselect::EXEC_EXISTS:
9659 return in_pred->finalize_exists_transform(select_lex);
9660 case Item_exists_subselect::EXEC_MATERIALIZATION:
9661 return in_pred->finalize_materialization_transform(this);
9662 default:
9663 DBUG_ASSERT(false);
9664 return true;
9665 }
9666 }
9667
9668
9669 /**
9670 Tells what is the cheapest between IN->EXISTS and subquery materialization,
9671 in terms of cost, for the subquery's JOIN.
9672 Input:
9673 - join->{best_positions,best_read,best_rowcount} must contain the
9674 execution plan of EXISTS (where 'join' is the subquery's JOIN)
9675 - join2->{best_positions,best_read,best_rowcount} must be correctly set
9676 (where 'join2' is the parent join, the grandparent join, etc).
9677 Output:
9678 join->{best_positions,best_read,best_rowcount} contain the cheapest
9679 execution plan (where 'join' is the subquery's JOIN).
9680
9681 This plan choice has to happen before calling functions which set up
9682 execution structures, like JOIN::get_best_combination() or
9683 JOIN::set_access_methods().
9684
9685 @param[out] method chosen method (EXISTS or materialization) will be put
9686 here.
9687 @returns false if success
9688 */
compare_costs_of_subquery_strategies(Item_exists_subselect::enum_exec_method * method)9689 bool JOIN::compare_costs_of_subquery_strategies(
9690 Item_exists_subselect::enum_exec_method *method)
9691 {
9692 *method= Item_exists_subselect::EXEC_EXISTS;
9693
9694 if (!thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MATERIALIZATION))
9695 return false;
9696
9697 const JOIN *parent_join= unit->outer_select()->join;
9698 if (!parent_join || !parent_join->child_subquery_can_materialize)
9699 return false;
9700
9701 Item_in_subselect * const in_pred=
9702 static_cast<Item_in_subselect *>(unit->item);
9703
9704 /*
9705 Testing subquery_allows_etc() at each optimization is necessary as each
9706 execution of a prepared statement may use a different type of parameter.
9707 */
9708 if (!subquery_allows_materialization(in_pred, thd, select_lex,
9709 select_lex->outer_select()))
9710 return false;
9711
9712 Opt_trace_context * const trace= &thd->opt_trace;
9713 Opt_trace_object trace_wrapper(trace);
9714 Opt_trace_object
9715 trace_subqmat(trace, "execution_plan_for_potential_materialization");
9716 const double saved_best_read= best_read;
9717 const ha_rows saved_best_rowcount= best_rowcount;
9718 POSITION * const saved_best_pos= best_positions;
9719
9720 if (in_pred->in2exists_added_to_where())
9721 {
9722 Opt_trace_array trace_subqmat_steps(trace, "steps");
9723
9724 // Up to one extra slot per semi-join nest is needed (if materialized)
9725 const uint sj_nests= select_lex->sj_nests.elements;
9726
9727 if (!(best_positions= new (thd->mem_root) POSITION[tables + sj_nests + 1]))
9728 return true;
9729
9730 // Compute plans which do not use outer references
9731
9732 DBUG_ASSERT(allow_outer_refs);
9733 allow_outer_refs= false;
9734
9735 if (optimize_semijoin_nests_for_materialization(this))
9736 return true;
9737
9738 if (Optimize_table_order(thd, this, NULL).choose_table_order())
9739 return true;
9740 }
9741 else
9742 {
9743 /*
9744 If IN->EXISTS didn't add any condition to WHERE (only to HAVING, which
9745 can happen if subquery has aggregates) then the plan for materialization
9746 will be the same as for EXISTS - don't compute it again.
9747 */
9748 trace_subqmat.add("surely_same_plan_as_EXISTS", true).
9749 add_alnum("cause", "EXISTS_did_not_change_WHERE");
9750 }
9751
9752 Semijoin_mat_optimize sjm;
9753 calculate_materialization_costs(this, NULL, primary_tables, &sjm);
9754
9755 /*
9756 The number of evaluations of the subquery influences costs, we need to
9757 compute it.
9758 */
9759 Opt_trace_object trace_subq_mat_decision(trace, "subq_mat_decision");
9760 Opt_trace_array trace_parents(trace, "parent_fanouts");
9761 const Item_subselect *subs= in_pred;
9762 double subq_executions= 1.0;
9763 for(;;)
9764 {
9765 Opt_trace_object trace_parent(trace);
9766 trace_parent.add_select_number(parent_join->select_lex->select_number);
9767 double parent_fanout;
9768 if (// safety, not sure needed
9769 parent_join->plan_is_const() ||
9770 // if subq is in condition on constant table:
9771 !parent_join->child_subquery_can_materialize)
9772 {
9773 parent_fanout= 1.0;
9774 trace_parent.add("subq_attached_to_const_table", true);
9775 }
9776 else
9777 {
9778 if (subs->in_cond_of_tab != INT_MIN)
9779 {
9780 /*
9781 Subquery is attached to a certain 'pos', pos[-1].prefix_record_count
9782 is the number of times we'll start a loop accessing 'pos'; each such
9783 loop will read pos->records_read records of 'pos', so subquery will
9784 be evaluated pos[-1].prefix_record_count * pos->records_read times.
9785 Exceptions:
9786 - if 'pos' is first, use 1 instead of pos[-1].prefix_record_count
9787 - if 'pos' is first of a sjerialization-mat nest, same.
9788
9789 If in a sj-materialization nest, pos->records_read and
9790 pos[-1].prefix_record_count are of the "nest materialization" plan
9791 (copied back in fix_semijoin_strategies()), which is
9792 appropriate as it corresponds to evaluations of our subquery.
9793 */
9794 const uint idx= subs->in_cond_of_tab;
9795 DBUG_ASSERT((int)idx >= 0 && idx < parent_join->tables);
9796 trace_parent.add("subq_attached_to_table", true);
9797 trace_parent.add_utf8_table(parent_join->join_tab[idx].table);
9798 parent_fanout= parent_join->join_tab[idx].position->records_read;
9799 if ((idx > parent_join->const_tables) &&
9800 !sj_is_materialize_strategy(parent_join
9801 ->join_tab[idx].position->sj_strategy))
9802 parent_fanout*=
9803 parent_join->join_tab[idx - 1].position->prefix_record_count;
9804 }
9805 else
9806 {
9807 /*
9808 Subquery is SELECT list, GROUP BY, ORDER BY, HAVING: it is evaluated
9809 at the end of the parent join's execution.
9810 It can be evaluated once per row-before-grouping:
9811 SELECT SUM(t1.col IN (subq)) FROM t1 GROUP BY expr;
9812 or once per row-after-grouping:
9813 SELECT SUM(t1.col) AS s FROM t1 GROUP BY expr HAVING s IN (subq),
9814 SELECT SUM(t1.col) IN (subq) FROM t1 GROUP BY expr
9815 It's hard to tell. We simply assume 'once per
9816 row-before-grouping'.
9817
9818 Another approximation:
9819 SELECT ... HAVING x IN (subq) LIMIT 1
9820 best_rowcount=1 due to LIMIT, though HAVING (and thus the subquery)
9821 may be evaluated many times before HAVING becomes true and the limit
9822 is reached.
9823 */
9824 trace_parent.add("subq_attached_to_join_result", true);
9825 parent_fanout= parent_join->best_rowcount;
9826 }
9827 }
9828 subq_executions*= parent_fanout;
9829 trace_parent.add("fanout", parent_fanout);
9830 const bool cacheable= parent_join->select_lex->is_cacheable();
9831 trace_parent.add("cacheable", cacheable);
9832 if (cacheable)
9833 {
9834 // Parent executed only once
9835 break;
9836 }
9837 /*
9838 Parent query is executed once per outer row => go up to find number of
9839 outer rows. Example:
9840 SELECT ... IN(subq-with-in2exists WHERE ... IN (subq-with-mat))
9841 */
9842 if (!(subs= parent_join->unit->item))
9843 {
9844 // derived table, materialized only once
9845 break;
9846 }
9847 parent_join= parent_join->unit->outer_select()->join;
9848 if (!parent_join)
9849 {
9850 /*
9851 May be single-table UPDATE/DELETE, has no join.
9852 @todo we should find how many rows it plans to UPDATE/DELETE, taking
9853 inspiration in Explain_table::explain_rows_and_filtered().
9854 This is not a priority as it applies only to
9855 UPDATE - child(non-mat-subq) - grandchild(may-be-mat-subq).
9856 And it will autosolve the day UPDATE gets a JOIN.
9857 */
9858 break;
9859 }
9860 } // for(;;)
9861 trace_parents.end();
9862
9863 const double cost_exists= subq_executions * saved_best_read;
9864 const double cost_mat_table= sjm.materialization_cost.total_cost();
9865 const double cost_mat= cost_mat_table + subq_executions *
9866 sjm.lookup_cost.total_cost();
9867 const bool mat_chosen=
9868 thd->optimizer_switch_flag(OPTIMIZER_SWITCH_SUBQ_MAT_COST_BASED) ?
9869 (cost_mat < cost_exists) : true;
9870 trace_subq_mat_decision
9871 .add("cost_to_create_and_fill_materialized_table",
9872 cost_mat_table)
9873 .add("cost_of_one_EXISTS", saved_best_read)
9874 .add("number_of_subquery_evaluations", subq_executions)
9875 .add("cost_of_materialization", cost_mat)
9876 .add("cost_of_EXISTS", cost_exists)
9877 .add("chosen", mat_chosen);
9878 if (mat_chosen)
9879 *method= Item_exists_subselect::EXEC_MATERIALIZATION;
9880 else
9881 {
9882 best_read= saved_best_read;
9883 best_rowcount= saved_best_rowcount;
9884 best_positions= saved_best_pos;
9885 /*
9886 Don't restore JOIN::positions or best_ref, they're not used
9887 afterwards. best_positions is (like: by get_sj_strategy()).
9888 */
9889 }
9890 return false;
9891 }
9892
9893
9894 /**
9895 Refine the best_rowcount estimation based on what happens after tables
9896 have been joined: LIMIT and type of result sink.
9897 */
refine_best_rowcount()9898 void JOIN::refine_best_rowcount()
9899 {
9900 // If plan is const, 0 or 1 rows should be returned
9901 DBUG_ASSERT(!plan_is_const() || best_rowcount <= 1);
9902
9903 if (plan_is_const())
9904 return;
9905
9906 /*
9907 If a derived table, or a member of a UNION which itself forms a derived
9908 table:
9909 setting estimate to 0 or 1 row would mark the derived table as const.
9910 The row count is bumped to the nearest higher value, so that the
9911 query block will not be evaluated during optimization.
9912 */
9913 if (best_rowcount <= 1 &&
9914 select_lex->master_unit()->first_select()->linkage ==
9915 DERIVED_TABLE_TYPE)
9916 best_rowcount= 2;
9917
9918 /*
9919 There will be no more rows than defined in the LIMIT clause. Use it
9920 as an estimate. If LIMIT 1 is specified, the query block will be
9921 considered "const", with actual row count 0 or 1.
9922 */
9923 set_if_smaller(best_rowcount, unit->select_limit_cnt);
9924 }
9925
9926 /**
9927 @} (end of group Query_Optimizer)
9928 */
9929