1 /* Copyright (c) 2000, 2021, Oracle and/or its affiliates.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
22 
23 /**
24   @file
25 
26   @brief Optimize query expressions: Make optimal table join order, select
27          optimal access methods per table, apply grouping, sorting and
28          limit processing.
29 
30   @defgroup Query_Optimizer  Query Optimizer
31   @{
32 */
33 
34 #include "sql_optimizer.h"
35 
36 #include "my_bit.h"              // my_count_bits
37 #include "abstract_query_plan.h" // Join_plan
38 #include "debug_sync.h"          // DEBUG_SYNC
39 #include "item_sum.h"            // Item_sum
40 #include "lock.h"                // mysql_unlock_some_tables
41 #include "opt_explain.h"         // join_type_str
42 #include "opt_trace.h"           // Opt_trace_object
43 #include "sql_base.h"            // init_ftfuncs
44 #include "sql_join_buffer.h"     // JOIN_CACHE
45 #include "sql_parse.h"           // check_stack_overrun
46 #include "sql_planner.h"         // calculate_condition_filter
47 #include "sql_resolver.h"        // subquery_allows_materialization
48 #include "sql_test.h"            // print_where
49 #include "sql_tmp_table.h"       // get_max_key_and_part_length
50 #include "opt_hints.h"           // hint_table_state
51 
52 #include <algorithm>
53 using std::max;
54 using std::min;
55 
56 static bool optimize_semijoin_nests_for_materialization(JOIN *join);
57 static void calculate_materialization_costs(JOIN *join, TABLE_LIST *sj_nest,
58                                             uint n_tables,
59                                             Semijoin_mat_optimize *sjm);
60 static bool make_join_select(JOIN *join, Item *item);
61 static bool list_contains_unique_index(JOIN_TAB *tab,
62                           bool (*find_func) (Field *, void *), void *data);
63 static bool find_field_in_item_list (Field *field, void *data);
64 static bool find_field_in_order_list (Field *field, void *data);
65 static ORDER *create_distinct_group(THD *thd, Ref_ptr_array ref_pointer_array,
66                                     ORDER *order, List<Item> &fields,
67                                     List<Item> &all_fields,
68 				    bool *all_order_by_fields_used);
69 static TABLE *get_sort_by_table(ORDER *a,ORDER *b,TABLE_LIST *tables);
70 static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab);
71 static Item *remove_additional_cond(Item* conds);
72 static void trace_table_dependencies(Opt_trace_context * trace,
73                                      JOIN_TAB *join_tabs,
74                                      uint table_count);
75 static bool
76 update_ref_and_keys(THD *thd, Key_use_array *keyuse,JOIN_TAB *join_tab,
77                     uint tables, Item *cond, COND_EQUAL *cond_equal,
78                     table_map normal_tables, SELECT_LEX *select_lex,
79                     SARGABLE_PARAM **sargables);
80 static bool pull_out_semijoin_tables(JOIN *join);
81 static void add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab);
82 static ha_rows get_quick_record_count(THD *thd, JOIN_TAB *tab, ha_rows limit);
83 static Item *
84 make_cond_for_table_from_pred(Item *root_cond, Item *cond,
85                               table_map tables, table_map used_table,
86                               bool exclude_expensive_cond);
87 static bool
88 only_eq_ref_tables(JOIN *join, ORDER *order, table_map tables,
89                    table_map *cached_eq_ref_tables, table_map
90                    *eq_ref_tables);
91 static bool setup_join_buffering(JOIN_TAB *tab, JOIN *join, uint no_jbuf_after);
92 
93 static bool
94 test_if_skip_sort_order(JOIN_TAB *tab, ORDER *order, ha_rows select_limit,
95                         const bool no_changes, const key_map *map,
96                         const char *clause_type);
97 
98 static Item_func_match *test_if_ft_index_order(ORDER *order);
99 
100 
101 static uint32 get_key_length_tmp_table(Item *item);
102 
103 /**
104   Optimizes one query block into a query execution plan (QEP.)
105 
106   This is the entry point to the query optimization phase. This phase
107   applies both logical (equivalent) query rewrites, cost-based join
108   optimization, and rule-based access path selection. Once an optimal
109   plan is found, the member function creates/initializes all
110   structures needed for query execution. The main optimization phases
111   are outlined below:
112 
113     -# Logical transformations:
114       - Outer to inner joins transformation.
115       - Equality/constant propagation.
116       - Partition pruning.
117       - COUNT(*), MIN(), MAX() constant substitution in case of
118         implicit grouping.
119       - ORDER BY optimization.
120     -# Perform cost-based optimization of table order and access path
121        selection. See JOIN::make_join_plan()
122     -# Post-join order optimization:
123        - Create optimal table conditions from the where clause and the
124          join conditions.
125        - Inject outer-join guarding conditions.
126        - Adjust data access methods after determining table condition
127          (several times.)
128        - Optimize ORDER BY/DISTINCT.
129     -# Code generation
130        - Set data access functions.
131        - Try to optimize away sorting/distinct.
132        - Setup temporary table usage for grouping and/or sorting.
133 
134   @retval 0 Success.
135   @retval 1 Error, error code saved in member JOIN::error.
136 */
137 int
optimize()138 JOIN::optimize()
139 {
140   uint no_jbuf_after= UINT_MAX;
141 
142   DBUG_ENTER("JOIN::optimize");
143   assert(select_lex->leaf_table_count == 0 ||
144          thd->lex->is_query_tables_locked() ||
145          select_lex == unit->fake_select_lex);
146   assert(tables == 0 &&
147          primary_tables == 0 &&
148          tables_list == (TABLE_LIST*)1);
149 
150   // to prevent double initialization on EXPLAIN
151   if (optimized)
152     DBUG_RETURN(0);
153 
154   Prepare_error_tracker tracker(thd);
155 
156   DEBUG_SYNC(thd, "before_join_optimize");
157 
158   THD_STAGE_INFO(thd, stage_optimizing);
159 
160   if (select_lex->first_execution)
161   {
162     /**
163       @todo
164       This query block didn't transform itself in SELECT_LEX::prepare(), so
165       belongs to a parent query block. That parent, or its parents, had to
166       transform us - it has not; maybe it is itself in prepare() and
167       evaluating the present query block as an Item_subselect. Such evaluation
168       in prepare() is expected to be a rare case to be eliminated in the
169       future ("SET x=(subq)" is one such case; because it locks tables before
170       prepare()).
171     */
172     if (select_lex->apply_local_transforms(thd, false))
173       DBUG_RETURN(error= 1);
174   }
175 
176   Opt_trace_context * const trace= &thd->opt_trace;
177   Opt_trace_object trace_wrapper(trace);
178   Opt_trace_object trace_optimize(trace, "join_optimization");
179   trace_optimize.add_select_number(select_lex->select_number);
180   Opt_trace_array trace_steps(trace, "steps");
181 
182   count_field_types(select_lex, &tmp_table_param, all_fields, false, false);
183 
184   assert(tmp_table_param.sum_func_count == 0 ||
185          group_list || implicit_grouping);
186 
187   if (select_lex->olap == ROLLUP_TYPE && optimize_rollup())
188     DBUG_RETURN(true); /* purecov: inspected */
189 
190   if (alloc_func_list())
191     DBUG_RETURN(1);    /* purecov: inspected */
192 
193   if (select_lex->get_optimizable_conditions(thd, &where_cond, &having_cond))
194     DBUG_RETURN(1);
195 
196   set_optimized();
197 
198   tables_list= select_lex->get_table_list();
199 
200   /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
201   /*
202     Run optimize phase for all derived tables/views used in this SELECT,
203     including those in semi-joins.
204   */
205   if (select_lex->materialized_derived_table_count)
206   {
207     for (TABLE_LIST *tl= select_lex->leaf_tables; tl; tl= tl->next_leaf)
208     {
209       if (tl->is_view_or_derived() && tl->optimize_derived(thd))
210         DBUG_RETURN(1);
211     }
212   }
213 
214   /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
215 
216   row_limit= ((select_distinct || order || group_list) ?
217              HA_POS_ERROR : unit->select_limit_cnt);
218   // m_select_limit is used to decide if we are likely to scan the whole table.
219   m_select_limit= unit->select_limit_cnt;
220 
221   if (unit->first_select()->active_options() & OPTION_FOUND_ROWS)
222   {
223     /*
224       Calculate found rows if
225       - LIMIT is set, and
226       - Query block is not equipped with "braces". In this case, each
227         query block must be calculated fully and the limit is applied on
228         the final UNION evaluation.
229     */
230     calc_found_rows= m_select_limit != HA_POS_ERROR && !select_lex->braces;
231   }
232   if (having_cond || calc_found_rows)
233     m_select_limit= HA_POS_ERROR;
234 
235   if (unit->select_limit_cnt == 0 && !calc_found_rows)
236   {
237     zero_result_cause= "Zero limit";
238     best_rowcount= 0;
239     goto setup_subq_exit;
240   }
241 
242   if (where_cond || select_lex->outer_join)
243   {
244     if (optimize_cond(thd, &where_cond, &cond_equal,
245                       &select_lex->top_join_list, &select_lex->cond_value))
246     {
247       error= 1;
248       DBUG_PRINT("error",("Error from optimize_cond"));
249       DBUG_RETURN(1);
250     }
251     if (select_lex->cond_value == Item::COND_FALSE)
252     {
253       zero_result_cause= "Impossible WHERE";
254       best_rowcount= 0;
255       goto setup_subq_exit;
256     }
257   }
258   if (having_cond)
259   {
260     if (optimize_cond(thd, &having_cond, &cond_equal, NULL,
261                       &select_lex->having_value))
262     {
263       error= 1;
264       DBUG_PRINT("error",("Error from optimize_cond"));
265       DBUG_RETURN(1);
266     }
267     if (select_lex->having_value == Item::COND_FALSE)
268     {
269       zero_result_cause= "Impossible HAVING";
270       best_rowcount= 0;
271       goto setup_subq_exit;
272     }
273   }
274 
275   if (select_lex->partitioned_table_count && prune_table_partitions())
276   {
277     error= 1;
278     DBUG_PRINT("error", ("Error from prune_partitions"));
279     DBUG_RETURN(1);
280   }
281 
282   /*
283      Try to optimize count(*), min() and max() to const fields if
284      there is implicit grouping (aggregate functions but no
285      group_list). In this case, the result set shall only contain one
286      row.
287   */
288   if (tables_list && implicit_grouping)
289   {
290     int res;
291     /*
292       opt_sum_query() returns HA_ERR_KEY_NOT_FOUND if no rows match
293       the WHERE condition,
294       or 1 if all items were resolved (optimized away),
295       or 0, or an error number HA_ERR_...
296 
297       If all items were resolved by opt_sum_query, there is no need to
298       open any tables.
299     */
300     if ((res= opt_sum_query(thd, select_lex->leaf_tables, all_fields,
301                             where_cond)))
302     {
303       best_rowcount= 0;
304       if (res == HA_ERR_KEY_NOT_FOUND)
305       {
306         DBUG_PRINT("info",("No matching min/max row"));
307 	zero_result_cause= "No matching min/max row";
308         goto setup_subq_exit;
309       }
310       if (res > 1)
311       {
312         error= res;
313         DBUG_PRINT("error",("Error from opt_sum_query"));
314         DBUG_RETURN(1);
315       }
316       if (res < 0)
317       {
318         DBUG_PRINT("info",("No matching min/max row"));
319         zero_result_cause= "No matching min/max row";
320         goto setup_subq_exit;
321       }
322       DBUG_PRINT("info",("Select tables optimized away"));
323       zero_result_cause= "Select tables optimized away";
324       tables_list= 0;				// All tables resolved
325       best_rowcount= 1;
326       const_tables= tables= primary_tables= select_lex->leaf_table_count;
327       /*
328         Extract all table-independent conditions and replace the WHERE
329         clause with them. All other conditions were computed by opt_sum_query
330         and the MIN/MAX/COUNT function(s) have been replaced by constants,
331         so there is no need to compute the whole WHERE clause again.
332         Notice that make_cond_for_table() will always succeed to remove all
333         computed conditions, because opt_sum_query() is applicable only to
334         conjunctions.
335         Preserve conditions for EXPLAIN.
336       */
337       if (where_cond && !thd->lex->describe)
338       {
339         Item *table_independent_conds=
340           make_cond_for_table(where_cond, PSEUDO_TABLE_BITS, 0, 0);
341         DBUG_EXECUTE("where",
342                      print_where(table_independent_conds,
343                                  "where after opt_sum_query()",
344                                  QT_ORDINARY););
345         where_cond= table_independent_conds;
346       }
347       goto setup_subq_exit;
348     }
349   }
350   if (!tables_list)
351   {
352     DBUG_PRINT("info",("No tables"));
353     best_rowcount= 1;
354     error= 0;
355     if (make_tmp_tables_info())
356       DBUG_RETURN(1);
357     count_field_types(select_lex, &tmp_table_param, all_fields, false, false);
358     // Make plan visible for EXPLAIN
359     set_plan_state(NO_TABLES);
360     DBUG_RETURN(0);
361   }
362   error= -1;					// Error is sent to client
363   sort_by_table= get_sort_by_table(order, group_list, select_lex->leaf_tables);
364 
365   if ((where_cond || group_list || order) &&
366       substitute_gc(thd, select_lex, where_cond, group_list, order))
367   {
368     // We added hidden fields to the all_fields list, count them.
369     count_field_types(select_lex, &tmp_table_param, select_lex->all_fields,
370                       false, false);
371   }
372 
373   // Set up join order and initial access paths
374   THD_STAGE_INFO(thd, stage_statistics);
375   if (make_join_plan())
376   {
377     if (thd->killed)
378       thd->send_kill_message();
379     DBUG_PRINT("error",("Error: JOIN::make_join_plan() failed"));
380     DBUG_RETURN(1);
381   }
382 
383   // At this stage, join_tab==NULL, JOIN_TABs are listed in order by best_ref.
384   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
385 
386   if (zero_result_cause)
387     goto setup_subq_exit;
388 
389   if (rollup.state != ROLLUP::STATE_NONE)
390   {
391     if (rollup_process_const_fields())
392     {
393       DBUG_PRINT("error", ("Error: rollup_process_fields() failed"));
394       DBUG_RETURN(1);
395     }
396     /*
397       Fields may have been replaced by Item_func_rollup_const, so
398       recalculate the number of fields and functions for this query block.
399     */
400 
401     // JOIN::optimize_rollup() may set quick_group=0, and we must not undo that.
402     const uint save_quick_group= tmp_table_param.quick_group;
403 
404     count_field_types(select_lex, &tmp_table_param, all_fields, false, false);
405     tmp_table_param.quick_group= save_quick_group;
406   }
407   else
408   {
409     /* Remove distinct if only const tables */
410     select_distinct&= !plan_is_const();
411   }
412 
413   if (const_tables && !thd->locked_tables_mode &&
414       !(select_lex->active_options() & SELECT_NO_UNLOCK))
415   {
416     TABLE *ct[MAX_TABLES];
417     for (uint i= 0; i < const_tables; i++)
418       ct[i]= best_ref[i]->table();
419     mysql_unlock_some_tables(thd, ct, const_tables);
420   }
421   if (!where_cond && select_lex->outer_join)
422   {
423     /* Handle the case where we have an OUTER JOIN without a WHERE */
424     where_cond=new Item_int((longlong) 1,1);	// Always true
425   }
426 
427   error= 0;
428   /*
429     Among the equal fields belonging to the same multiple equality
430     choose the one that is to be retrieved first and substitute
431     all references to these in where condition for a reference for
432     the selected field.
433   */
434   if (where_cond)
435   {
436     where_cond= substitute_for_best_equal_field(where_cond, cond_equal,
437                                                 map2table);
438     if (thd->is_error())
439     {
440       error= 1;
441       DBUG_PRINT("error",("Error from substitute_for_best_equal"));
442       DBUG_RETURN(1);
443     }
444     where_cond->update_used_tables();
445     DBUG_EXECUTE("where",
446                  print_where(where_cond,
447                              "after substitute_best_equal",
448                              QT_ORDINARY););
449   }
450 
451   /*
452     Perform the same optimization on field evaluation for all join conditions.
453   */
454   for (uint i= const_tables; i < tables ; ++i)
455   {
456     JOIN_TAB *const tab= best_ref[i];
457     if (tab->position() && tab->join_cond())
458     {
459       tab->set_join_cond(substitute_for_best_equal_field(tab->join_cond(),
460                                                          tab->cond_equal,
461                                                          map2table));
462       if (thd->is_error())
463       {
464         error= 1;
465         DBUG_PRINT("error",("Error from substitute_for_best_equal"));
466         DBUG_RETURN(1);
467       }
468       tab->join_cond()->update_used_tables();
469     }
470   }
471 
472   if (init_ref_access())
473   {
474     error= 1;
475     DBUG_PRINT("error",("Error from init_ref_access"));
476     DBUG_RETURN(1);
477   }
478 
479   // Update table dependencies after assigning ref access fields
480   update_depend_map();
481 
482   THD_STAGE_INFO(thd, stage_preparing);
483 
484   if (make_join_select(this, where_cond))
485   {
486     if (thd->is_error())
487       DBUG_RETURN(1);
488 
489     zero_result_cause=
490       "Impossible WHERE noticed after reading const tables";
491     goto setup_subq_exit;
492   }
493 
494   if (select_lex->query_result()->initialize_tables(this))
495   {
496     DBUG_PRINT("error",("Error: initialize_tables() failed"));
497     DBUG_RETURN(1);				// error == -1
498   }
499 
500   error= -1;					/* if goto err */
501 
502   if (optimize_distinct_group_order())
503     DBUG_RETURN(true);
504 
505   if ((select_lex->active_options() & SELECT_NO_JOIN_CACHE) ||
506       select_lex->ftfunc_list->elements)
507     no_jbuf_after= 0;
508 
509   /* Perform FULLTEXT search before all regular searches */
510   if (select_lex->has_ft_funcs() && optimize_fts_query())
511     DBUG_RETURN(1);
512 
513   /*
514     By setting child_subquery_can_materialize so late we gain the following:
515     JOIN::compare_costs_of_subquery_strategies() can test this variable to
516     know if we are have finished evaluating constant conditions, which itself
517     helps determining fanouts.
518   */
519   child_subquery_can_materialize= true;
520 
521   /*
522     It's necessary to check const part of HAVING cond as
523     there is a chance that some cond parts may become
524     const items after make_join_statisctics(for example
525     when Item is a reference to const table field from
526     outer join).
527     This check is performed only for those conditions
528     which do not use aggregate functions. In such case
529     temporary table may not be used and const condition
530     elements may be lost during further having
531     condition transformation in JOIN::exec.
532   */
533   if (having_cond && const_table_map && !having_cond->with_sum_func)
534   {
535     having_cond->update_used_tables();
536     if (remove_eq_conds(thd, having_cond, &having_cond,
537                         &select_lex->having_value))
538     {
539       error= 1;
540       DBUG_PRINT("error",("Error from remove_eq_conds"));
541       DBUG_RETURN(1);
542     }
543     if (select_lex->having_value == Item::COND_FALSE)
544     {
545       having_cond= new Item_int((longlong) 0,1);
546       zero_result_cause= "Impossible HAVING noticed after reading const tables";
547       goto setup_subq_exit;
548     }
549   }
550 
551   /* Cache constant expressions in WHERE, HAVING, ON clauses. */
552   if (!plan_is_const() && cache_const_exprs())
553     DBUG_RETURN(1);
554 
555   // See if this subquery can be evaluated with subselect_indexsubquery_engine
556   if (const int ret= replace_index_subquery())
557   {
558     set_plan_state(PLAN_READY);
559     /*
560       We leave optimize() because the rest of it is only about order/group
561       which those subqueries don't have and about setting up plan which
562       we're not going to use due to different execution method.
563     */
564     DBUG_RETURN(ret < 0);
565   }
566 
567   {
568     /*
569       If the hint FORCE INDEX FOR ORDER BY/GROUP BY is used for the first
570       table (it does not make sense for other tables) then we cannot do join
571       buffering.
572     */
573     if (!plan_is_const())
574     {
575       const TABLE * const first= best_ref[const_tables]->table();
576       if ((first->force_index_order && order) ||
577           (first->force_index_group && group_list))
578         no_jbuf_after= 0;
579     }
580 
581     bool simple_sort= true;
582     // Check whether join cache could be used
583     for (uint i= const_tables; i < tables; i++)
584     {
585       JOIN_TAB *const tab= best_ref[i];
586       if (!tab->position())
587         continue;
588       if (setup_join_buffering(tab, this, no_jbuf_after))
589         DBUG_RETURN(true);
590       if (tab->use_join_cache() != JOIN_CACHE::ALG_NONE)
591         simple_sort= false;
592       assert(tab->type() != JT_FT ||
593              tab->use_join_cache() == JOIN_CACHE::ALG_NONE);
594     }
595     if (!simple_sort)
596     {
597       /*
598         A join buffer is used for this table. We here inform the optimizer
599         that it should not rely on rows of the first non-const table being in
600         order thanks to an index scan; indeed join buffering of the present
601         table subsequently changes the order of rows.
602       */
603       simple_order= simple_group= false;
604     }
605   }
606 
607   if (!plan_is_const() && order)
608   {
609     /*
610       Force using of tmp table if sorting by a SP or UDF function due to
611       their expensive and probably non-deterministic nature.
612     */
613     for (ORDER *tmp_order= order; tmp_order ; tmp_order=tmp_order->next)
614     {
615       Item *item= *tmp_order->item;
616       if (item->is_expensive())
617       {
618         /* Force tmp table without sort */
619         simple_order= simple_group= false;
620         break;
621       }
622     }
623   }
624 
625   /*
626     Check if we need to create a temporary table.
627     This has to be done if all tables are not already read (const tables)
628     and one of the following conditions holds:
629     - We are using DISTINCT (simple distinct's have already been optimized away)
630     - We are using an ORDER BY or GROUP BY on fields not in the first table
631     - We are using different ORDER BY and GROUP BY orders
632     - The user wants us to buffer the result.
633     When the WITH ROLLUP modifier is present, we cannot skip temporary table
634     creation for the DISTINCT clause just because there are only const tables.
635   */
636   need_tmp= ((!plan_is_const() &&
637 	     ((select_distinct || (order && !simple_order) ||
638                (group_list && !simple_group)) ||
639 	      (group_list && order) ||
640               (select_lex->active_options() & OPTION_BUFFER_RESULT))) ||
641              (rollup.state != ROLLUP::STATE_NONE && select_distinct));
642 
643   DBUG_EXECUTE("info", TEST_join(this););
644 
645   if (!plan_is_const())
646   {
647     JOIN_TAB *tab= best_ref[const_tables];
648     /*
649       Because filesort always does a full table scan or a quick range scan
650       we must add the removed reference to the select for the table.
651       We only need to do this when we have a simple_order or simple_group
652       as in other cases the join is done before the sort.
653     */
654     if ((order || group_list) &&
655         tab->type() != JT_ALL &&
656         tab->type() != JT_FT &&
657         tab->type() != JT_REF_OR_NULL &&
658         ((order && simple_order) || (group_list && simple_group)))
659     {
660       if (add_ref_to_table_cond(thd,tab)) {
661         DBUG_RETURN(1);
662       }
663     }
664     // Test if we can use an index instead of sorting
665     test_skip_sort();
666   }
667 
668   if (alloc_qep(tables))
669     DBUG_RETURN(error= 1);                      /* purecov: inspected */
670 
671   if (make_join_readinfo(this, no_jbuf_after))
672     DBUG_RETURN(1);                             /* purecov: inspected */
673 
674   if (make_tmp_tables_info())
675     DBUG_RETURN(1);
676 
677   // At this stage, we have fully set QEP_TABs; JOIN_TABs are unaccessible,
678   // pushed joins(see below) are still allowed to change the QEP_TABs
679 
680   /*
681     Push joins to handlerton(s)
682 
683     The handlerton(s) will inspect the QEP through the
684     AQP (Abstract Query Plan) and extract from it whatever
685     it might implement of pushed execution.
686 
687     It is the responsibility of the handler:
688      - to store any information it need for later
689        execution of pushed queries.
690      - to call appropriate AQP functions which modifies the
691        QEP to use the special 'linked' read functions
692        for those parts of the join which have been pushed.
693 
694     Currently pushed joins are only implemented by NDB.
695 
696     It only make sense to try pushing if > 1 non-const tables.
697   */
698   if (!plan_is_single_table() && !plan_is_const())
699   {
700     const AQP::Join_plan plan(this);
701     if (ha_make_pushed_joins(thd, &plan))
702       DBUG_RETURN(1);
703   }
704 
705   // Update m_current_query_cost to reflect actual need of filesort.
706   if (sort_cost > 0.0 && !explain_flags.any(ESP_USING_FILESORT))
707   {
708     best_read-= sort_cost;
709     sort_cost= 0.0;
710     if (thd->lex->is_single_level_stmt())
711       thd->m_current_query_cost= best_read;
712   }
713 
714   count_field_types(select_lex, &tmp_table_param, all_fields, false, false);
715   // Make plan visible for EXPLAIN
716   set_plan_state(PLAN_READY);
717 
718   DEBUG_SYNC(thd, "after_join_optimize");
719 
720   error= 0;
721   DBUG_RETURN(0);
722 
723 setup_subq_exit:
724 
725   assert(zero_result_cause != NULL);
726   /*
727     Even with zero matching rows, subqueries in the HAVING clause may
728     need to be evaluated if there are aggregate functions in the
729     query. If this JOIN is part of an outer query, subqueries in HAVING may
730     be evaluated several times in total; so subquery materialization makes
731     sense.
732   */
733   child_subquery_can_materialize= true;
734   trace_steps.end();   // because all steps are done
735   Opt_trace_object(trace, "empty_result")
736     .add_alnum("cause", zero_result_cause);
737 
738   having_for_explain= having_cond;
739   error= 0;
740 
741   if (!qep_tab && best_ref)
742   {
743     /*
744       After creation of JOIN_TABs in make_join_plan(), we have shortcut due to
745       some zero_result_cause. For simplification, if we have JOIN_TABs we
746       want QEP_TABs too.
747     */
748     if (alloc_qep(tables))
749       DBUG_RETURN(1);                           /* purecov: inspected */
750     unplug_join_tabs();
751   }
752 
753   set_plan_state(ZERO_RESULT);
754   DBUG_RETURN(0);
755 }
756 
757 
758 /**
759   Substitute all expressions in the WHERE condition and ORDER/GROUP lists
760   that match generated columns (GC) expressions with GC fields, if any.
761 
762   @details This function does 3 things:
763   1) Creates list of all GC fields that are a part of a key and the GC
764     expression is a function. All query tables are scanned. If there's no
765     such fields, function exits.
766   2) By means of Item::compile() WHERE clause is transformed.
767     @see Item_func::gc_subst_transformer() for details.
768   3) If there's ORDER/GROUP BY clauses, this function tries to substitute
769     expressions in these lists with GC too. It removes from the list of
770     indexed GC all elements which index blocked by hints. This is done to
771     reduce amount of further work. Next it goes through ORDER/GROUP BY list
772     and matches the expression in it against GC expressions in indexed GC
773     list. When a match is found, the expression is replaced with a new
774     Item_field for the matched GC field. Also, this new field is added to
775     the hidden part of all_fields list.
776 
777   @param thd         thread handle
778   @param select_lex  the current select
779   @param where_cond  the WHERE condition, possibly NULL
780   @param group_list  the GROUP BY clause, possibly NULL
781   @param order       the ORDER BY clause, possibly NULL
782 
783   @return true if the GROUP BY clause or the ORDER BY clause was
784           changed, false otherwise
785 */
786 
substitute_gc(THD * thd,SELECT_LEX * select_lex,Item * where_cond,ORDER * group_list,ORDER * order)787 bool substitute_gc(THD *thd, SELECT_LEX *select_lex, Item *where_cond,
788                    ORDER *group_list, ORDER *order)
789 {
790   List<Field> indexed_gc;
791   Opt_trace_context * const trace= &thd->opt_trace;
792   Opt_trace_object trace_wrapper(trace);
793   Opt_trace_object subst_gc(trace, "substitute_generated_columns");
794 
795   // Collect all GCs that are a part of a key
796   for (TABLE_LIST *tl= select_lex->leaf_tables;
797        tl;
798        tl= tl->next_leaf)
799   {
800     if (tl->table->s->keys == 0)
801       continue;
802     for (uint i= 0; i < tl->table->s->fields; i++)
803     {
804       Field *fld= tl->table->field[i];
805       if (fld->is_gcol() && !fld->part_of_key.is_clear_all() &&
806           fld->gcol_info->expr_item->can_be_substituted_for_gc())
807       {
808         // Don't check allowed keys here as conditions/group/order use
809         // different keymaps for that.
810         indexed_gc.push_back(fld);
811       }
812     }
813   }
814   // No GC in the tables used in the query
815   if (indexed_gc.elements == 0)
816     return false;
817 
818   if (where_cond)
819   {
820     // Item_func::compile will dereference this pointer, provide valid value.
821     uchar i, *dummy= &i;
822     where_cond->compile(&Item::gc_subst_analyzer, &dummy,
823                         &Item::gc_subst_transformer, (uchar*) &indexed_gc);
824     subst_gc.add("resulting_condition", where_cond);
825   }
826 
827   if (!(group_list || order))
828     return false;
829   // Filter out GCs that do not have index usable for GROUP/ORDER
830   Field *gc;
831   List_iterator<Field> li(indexed_gc);
832 
833   while ((gc= li++))
834   {
835     key_map tkm= gc->part_of_key;
836     tkm.intersect(group_list ? gc->table->keys_in_use_for_group_by :
837                   gc->table->keys_in_use_for_order_by);
838     if (tkm.is_clear_all())
839       li.remove();
840   }
841   if (!indexed_gc.elements)
842     return false;
843 
844   // Index could be used for ORDER only if there is no GROUP
845   ORDER *list= group_list ? group_list : order;
846   bool changed= false;
847   for (ORDER *ord= list; ord; ord= ord->next)
848   {
849     li.rewind();
850     if (!(*ord->item)->can_be_substituted_for_gc())
851       continue;
852     while ((gc= li++))
853     {
854       Item_func *tmp= pointer_cast<Item_func*>(*ord->item);
855       Item_field *field;
856       if ((field= get_gc_for_expr(&tmp, gc, gc->result_type())))
857       {
858 
859         changed= true;
860         /* Add new field to field list. */
861         ord->item= select_lex->add_hidden_item(field);
862         break;
863       }
864     }
865   }
866   if (changed && trace->is_started())
867   {
868     String str;
869     st_select_lex::print_order(&str, list,
870                                enum_query_type(QT_TO_SYSTEM_CHARSET |
871                                                QT_SHOW_SELECT_NUMBER |
872                                                QT_NO_DEFAULT_DB));
873     subst_gc.add_utf8(group_list ? "resulting_GROUP_BY" :
874                       "resulting_ORDER_BY",
875                       str.ptr(), str.length());
876   }
877   return changed;
878 }
879 
880 
881 /**
882    Sets the plan's state of the JOIN. This is always the final step of
883    optimization; starting from this call, we expose the plan to other
884    connections (via EXPLAIN CONNECTION) so the plan has to be final.
885    QEP_TAB's quick_optim, condition_optim and keyread_optim are set here.
886 */
set_plan_state(enum_plan_state plan_state_arg)887 void JOIN::set_plan_state(enum_plan_state plan_state_arg)
888 {
889   // A plan should not change to another plan:
890   assert(plan_state_arg == NO_PLAN || plan_state == NO_PLAN);
891   if (plan_state == NO_PLAN && plan_state_arg != NO_PLAN)
892   {
893     if (qep_tab != NULL)
894     {
895       /*
896         We want to cover primary tables, tmp tables (they may have a sort, so
897         their "quick" and "condition" may change when execution runs the
898         sort), and sj-mat inner tables. Note that make_tmp_tables_info() may
899         have added a sort to the first non-const primary table, so it's
900         important to do those assignments after make_tmp_tables_info().
901       */
902       for (uint i= const_tables; i < tables; ++i)
903       {
904         qep_tab[i].set_quick_optim();
905         qep_tab[i].set_condition_optim();
906         qep_tab[i].set_keyread_optim();
907       }
908     }
909   }
910 
911   DEBUG_SYNC(thd, "before_set_plan");
912 
913   // If SQLCOM_END, no thread is explaining our statement anymore.
914   const bool need_lock= thd->query_plan.get_command() != SQLCOM_END;
915 
916   if (need_lock)
917     thd->lock_query_plan();
918   plan_state= plan_state_arg;
919   if (need_lock)
920     thd->unlock_query_plan();
921 }
922 
923 
alloc_qep(uint n)924 bool JOIN::alloc_qep(uint n)
925 {
926   // Just to be sure that type plan_idx is wide enough:
927   compile_time_assert(MAX_TABLES <= INT_MAX8);
928 
929   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
930 
931   qep_tab= new(thd->mem_root) QEP_TAB[n];
932   if (!qep_tab)
933     return true;                                /* purecov: inspected */
934   for (uint i= 0; i < n; ++i)
935     qep_tab[i].init(best_ref[i]);
936   return false;
937 }
938 
939 
init(JOIN_TAB * jt)940 void QEP_TAB::init(JOIN_TAB *jt)
941 {
942   jt->share_qs(this);
943   set_table(table()); // to update table()->reginfo.qep_tab
944   table_ref= jt->table_ref;
945 }
946 
947 
948 /// @returns semijoin strategy for this table.
get_sj_strategy() const949 uint QEP_TAB::get_sj_strategy() const
950 {
951   if (first_sj_inner() == NO_PLAN_IDX)
952     return SJ_OPT_NONE;
953   const uint s= join()->qep_tab[first_sj_inner()].position()->sj_strategy;
954   assert(s != SJ_OPT_NONE);
955   return s;
956 }
957 
958 /**
959   Return the index used for a table in a QEP
960 
961   The various access methods have different places where the index/key
962   number is stored, so this function is needed to return the correct value.
963 
964   @returns index number, or MAX_KEY if not applicable.
965 
966   JT_SYSTEM and JT_ALL does not use an index, and will always return MAX_KEY.
967 
968   JT_INDEX_MERGE supports more than one index. Hence MAX_KEY is returned and
969   a further inspection is needed.
970 */
effective_index() const971 uint QEP_TAB::effective_index() const
972 {
973   switch (type())
974   {
975   case JT_SYSTEM:
976     assert(ref().key == -1);
977     return MAX_KEY;
978 
979   case JT_CONST:
980   case JT_EQ_REF:
981   case JT_REF_OR_NULL:
982   case JT_REF:
983     assert(ref().key != -1);
984     return uint(ref().key);
985 
986   case JT_INDEX_SCAN:
987   case JT_FT:
988     return index();
989 
990   case JT_INDEX_MERGE:
991     assert(quick()->index == MAX_KEY);
992     return MAX_KEY;
993 
994   case JT_RANGE:
995     return quick()->index;
996 
997   case JT_ALL:
998   default:
999     // @todo Check why JT_UNKNOWN is a valid value here.
1000     assert(type() == JT_ALL || type() == JT_UNKNOWN);
1001     return MAX_KEY;
1002   }
1003 }
1004 
get_sj_strategy() const1005 uint JOIN_TAB::get_sj_strategy() const
1006 {
1007   if (first_sj_inner() == NO_PLAN_IDX)
1008     return SJ_OPT_NONE;
1009   ASSERT_BEST_REF_IN_JOIN_ORDER(join());
1010   JOIN_TAB *tab= join()->best_ref[first_sj_inner()];
1011   uint s= tab->position()->sj_strategy;
1012   assert(s != SJ_OPT_NONE);
1013   return s;
1014 }
1015 
1016 
replace_index_subquery()1017 int JOIN::replace_index_subquery()
1018 {
1019   DBUG_ENTER("replace_index_subquery");
1020   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
1021 
1022   if (group_list ||
1023       !(unit->item && unit->item->substype() == Item_subselect::IN_SUBS) ||
1024       primary_tables != 1 || !where_cond ||
1025       unit->is_union())
1026     DBUG_RETURN(0);
1027 
1028   // Guaranteed by remove_redundant_subquery_clauses():
1029   assert(order == NULL && !select_distinct);
1030 
1031   subselect_engine *engine= NULL;
1032   Item_in_subselect * const in_subs=
1033     static_cast<Item_in_subselect *>(unit->item);
1034   enum join_type type= JT_UNKNOWN;
1035 
1036   JOIN_TAB *const first_join_tab= best_ref[0];
1037 
1038   if (in_subs->exec_method == Item_exists_subselect::EXEC_MATERIALIZATION)
1039   {
1040     // We cannot have two engines at the same time
1041   }
1042   else if (having_cond == NULL)
1043   {
1044     if (first_join_tab->type() == JT_EQ_REF &&
1045         first_join_tab->ref().items[0]->item_name.ptr() == in_left_expr_name)
1046     {
1047       type= JT_UNIQUE_SUBQUERY;
1048       /*
1049         This uses test_if_ref(), which needs access to JOIN_TAB::join_cond() so
1050         it must be done before we get rid of JOIN_TAB.
1051       */
1052       remove_subq_pushed_predicates();
1053     }
1054     else if (first_join_tab->type() == JT_REF &&
1055              first_join_tab->ref().items[0]->item_name.ptr() == in_left_expr_name)
1056     {
1057       type= JT_INDEX_SUBQUERY;
1058       remove_subq_pushed_predicates();
1059     }
1060   }
1061   else if (first_join_tab->type() == JT_REF_OR_NULL &&
1062            first_join_tab->ref().items[0]->item_name.ptr() == in_left_expr_name &&
1063            having_cond->item_name.ptr() == in_having_cond)
1064   {
1065     type= JT_INDEX_SUBQUERY;
1066     where_cond= remove_additional_cond(where_cond);
1067   }
1068 
1069   if (type == JT_UNKNOWN)
1070     DBUG_RETURN(0);
1071 
1072   if (alloc_qep(tables))
1073     DBUG_RETURN(-1);                            /* purecov: inspected */
1074   unplug_join_tabs();
1075 
1076   error= 0;
1077   QEP_TAB *const first_qep_tab= &qep_tab[0];
1078 
1079   if (first_qep_tab->table()->covering_keys.is_set(first_qep_tab->ref().key))
1080   {
1081     assert(!first_qep_tab->table()->no_keyread);
1082     first_qep_tab->table()->set_keyread(true);
1083   }
1084   // execution uses where_cond:
1085   first_qep_tab->set_condition(where_cond);
1086 
1087   engine=
1088     new subselect_indexsubquery_engine(thd, first_qep_tab, unit->item,
1089                                        where_cond,
1090                                        having_cond,
1091                                        // check_null
1092                                        first_qep_tab->type() == JT_REF_OR_NULL,
1093                                        // unique
1094                                        type == JT_UNIQUE_SUBQUERY);
1095   /**
1096      @todo If having_cond!=NULL we pass unique=false. But for this query:
1097      (oe1, oe2) IN (SELECT primary_key, non_key_maybe_null_field FROM tbl)
1098      we could use "unique=true" for the first index component and let
1099      Item_is_not_null_test(non_key_maybe_null_field) handle the second.
1100   */
1101 
1102   first_qep_tab->set_type(type);
1103 
1104   if (!unit->item->change_engine(engine))
1105     DBUG_RETURN(1);
1106   else // error:
1107     DBUG_RETURN(-1);                            /* purecov: inspected */
1108 }
1109 
1110 
optimize_distinct_group_order()1111 bool JOIN::optimize_distinct_group_order()
1112 {
1113   DBUG_ENTER("optimize_distinct_group_order");
1114   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
1115 
1116   /* Optimize distinct away if possible */
1117   {
1118     ORDER *org_order= order;
1119     order= ORDER_with_src(remove_const(order, where_cond, 1, &simple_order,
1120                                        "ORDER BY"),
1121                           order.src);
1122     if (thd->is_error())
1123     {
1124       error= 1;
1125       DBUG_PRINT("error",("Error from remove_const"));
1126       DBUG_RETURN(true);
1127     }
1128 
1129     /*
1130       If we are using ORDER BY NULL or ORDER BY const_expression,
1131       return result in any order (even if we are using a GROUP BY)
1132     */
1133     if (!order && org_order)
1134       skip_sort_order= 1;
1135   }
1136   /*
1137      Check if we can optimize away GROUP BY/DISTINCT.
1138      We can do that if there are no aggregate functions, the
1139      fields in DISTINCT clause (if present) and/or columns in GROUP BY
1140      (if present) contain direct references to all key parts of
1141      an unique index (in whatever order) and if the key parts of the
1142      unique index cannot contain NULLs.
1143      Note that the unique keys for DISTINCT and GROUP BY should not
1144      be the same (as long as they are unique).
1145 
1146      The FROM clause must contain a single non-constant table.
1147 
1148      @todo Apart from the LIS test, every condition depends only on facts
1149      which can be known in SELECT_LEX::prepare(), possibly this block should
1150      move there.
1151   */
1152 
1153   JOIN_TAB *const tab= best_ref[const_tables];
1154 
1155   if (plan_is_single_table() &&
1156       (group_list || select_distinct) &&
1157       !tmp_table_param.sum_func_count &&
1158       (!tab->quick() ||
1159        tab->quick()->get_type() !=
1160        QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX))
1161   {
1162     if (group_list && rollup.state == ROLLUP::STATE_NONE &&
1163        list_contains_unique_index(tab,
1164                                  find_field_in_order_list,
1165                                  (void *) group_list))
1166     {
1167       /*
1168         We have found that grouping can be removed since groups correspond to
1169         only one row anyway, but we still have to guarantee correct result
1170         order. The line below effectively rewrites the query from GROUP BY
1171         <fields> to ORDER BY <fields>. There are three exceptions:
1172         - if skip_sort_order is set (see above), then we can simply skip
1173           GROUP BY;
1174         - if IN(subquery), likewise (see remove_redundant_subquery_clauses())
1175         - we can only rewrite ORDER BY if the ORDER BY fields are 'compatible'
1176           with the GROUP BY ones, i.e. either one is a prefix of another.
1177           We only check if the ORDER BY is a prefix of GROUP BY. In this case
1178           test_if_subpart() copies the ASC/DESC attributes from the original
1179           ORDER BY fields.
1180           If GROUP BY is a prefix of ORDER BY, then it is safe to leave
1181           'order' as is.
1182        */
1183       if (!order || test_if_subpart(group_list, order))
1184         order= (skip_sort_order ||
1185                 (unit->item && unit->item->substype() ==
1186                  Item_subselect::IN_SUBS)) ? NULL : group_list;
1187 
1188       /*
1189         If we have an IGNORE INDEX FOR GROUP BY(fields) clause, this must be
1190         rewritten to IGNORE INDEX FOR ORDER BY(fields).
1191       */
1192       best_ref[0]->table()->keys_in_use_for_order_by=
1193         best_ref[0]->table()->keys_in_use_for_group_by;
1194       group_list= 0;
1195       grouped= false;
1196     }
1197     if (select_distinct &&
1198        list_contains_unique_index(tab,
1199                                  find_field_in_item_list,
1200                                  (void *) &fields_list))
1201     {
1202       select_distinct= 0;
1203     }
1204   }
1205   if (!(group_list || tmp_table_param.sum_func_count) &&
1206       select_distinct &&
1207       plan_is_single_table() &&
1208       rollup.state == ROLLUP::STATE_NONE)
1209   {
1210     /*
1211       We are only using one table. In this case we change DISTINCT to a
1212       GROUP BY query if:
1213       - The GROUP BY can be done through indexes (no sort) and the ORDER
1214         BY only uses selected fields.
1215 	(In this case we can later optimize away GROUP BY and ORDER BY)
1216       - We are scanning the whole table without LIMIT
1217         This can happen if:
1218         - We are using CALC_FOUND_ROWS
1219         - We are using an ORDER BY that can't be optimized away.
1220 
1221       We don't want to use this optimization when we are using LIMIT
1222       because in this case we can just create a temporary table that
1223       holds LIMIT rows and stop when this table is full.
1224     */
1225     if (order)
1226     {
1227       skip_sort_order=
1228         test_if_skip_sort_order(tab, order, m_select_limit,
1229                                 true,           // no_changes
1230                                 &tab->table()->keys_in_use_for_order_by,
1231                                 "ORDER BY");
1232       count_field_types(select_lex, &tmp_table_param, all_fields, false, false);
1233     }
1234     ORDER *o;
1235     bool all_order_fields_used;
1236     if ((o= create_distinct_group(thd, ref_ptrs,
1237                                   order, fields_list, all_fields,
1238 				  &all_order_fields_used)))
1239     {
1240       group_list= ORDER_with_src(o, ESC_DISTINCT);
1241       const bool skip_group=
1242         skip_sort_order &&
1243         test_if_skip_sort_order(tab, group_list, m_select_limit,
1244                                 true,         // no_changes
1245                                 &tab->table()->keys_in_use_for_group_by,
1246                                 "GROUP BY");
1247       count_field_types(select_lex, &tmp_table_param, all_fields, false, false);
1248       if ((skip_group && all_order_fields_used) ||
1249 	  m_select_limit == HA_POS_ERROR ||
1250 	  (order && !skip_sort_order))
1251       {
1252 	/*  Change DISTINCT to GROUP BY */
1253 	select_distinct= 0;
1254 	no_order= !order;
1255 	if (all_order_fields_used)
1256 	{
1257 	  if (order && skip_sort_order)
1258 	  {
1259 	    /*
1260 	      Force MySQL to read the table in sorted order to get result in
1261 	      ORDER BY order.
1262 	    */
1263 	    tmp_table_param.quick_group=0;
1264 	  }
1265 	  order=0;
1266         }
1267         grouped= true;                    // For end_write_group
1268       }
1269       else
1270 	group_list= 0;
1271     }
1272     else if (thd->is_fatal_error)         // End of memory
1273       DBUG_RETURN(true);
1274   }
1275   simple_group= 0;
1276   {
1277     ORDER *old_group_list= group_list;
1278     group_list= ORDER_with_src(remove_const(group_list, where_cond,
1279                                             rollup.state == ROLLUP::STATE_NONE,
1280                                             &simple_group, "GROUP BY"),
1281                                group_list.src);
1282 
1283     if (thd->is_error())
1284     {
1285       error= 1;
1286       DBUG_PRINT("error",("Error from remove_const"));
1287       DBUG_RETURN(true);
1288     }
1289     if (old_group_list && !group_list)
1290       select_distinct= 0;
1291   }
1292   if (!group_list && grouped)
1293   {
1294     order=0;					// The output has only one row
1295     simple_order=1;
1296     select_distinct= 0;                       // No need in distinct for 1 row
1297     group_optimized_away= 1;
1298   }
1299 
1300   calc_group_buffer(this, group_list);
1301   send_group_parts= tmp_table_param.group_parts; /* Save org parts */
1302 
1303   if (test_if_subpart(group_list, order) ||
1304       (!group_list && tmp_table_param.sum_func_count))
1305   {
1306     order=0;
1307     if (is_indexed_agg_distinct(this, NULL))
1308       sort_and_group= 0;
1309   }
1310 
1311   DBUG_RETURN(false);
1312 }
1313 
1314 
test_skip_sort()1315 void JOIN::test_skip_sort()
1316 {
1317   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
1318   JOIN_TAB *const tab= best_ref[const_tables];
1319 
1320   assert(ordered_index_usage == ordered_index_void);
1321 
1322   if (group_list)   // GROUP BY honoured first
1323                     // (DISTINCT was rewritten to GROUP BY if skippable)
1324   {
1325     /*
1326       When there is SQL_BIG_RESULT do not sort using index for GROUP BY,
1327       and thus force sorting on disk unless a group min-max optimization
1328       is going to be used as it is applied now only for one table queries
1329       with covering indexes.
1330     */
1331     if (!(select_lex->active_options() & SELECT_BIG_RESULT || with_json_agg) ||
1332         (tab->quick() &&
1333          tab->quick()->get_type() ==
1334            QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX))
1335     {
1336       if (simple_group &&              // GROUP BY is possibly skippable
1337           !select_distinct)            // .. if not preceded by a DISTINCT
1338       {
1339         /*
1340           Calculate a possible 'limit' of table rows for 'GROUP BY':
1341           A specified 'LIMIT' is relative to the final resultset.
1342           'need_tmp' implies that there will be more postprocessing
1343           so the specified 'limit' should not be enforced yet.
1344          */
1345         const ha_rows limit = need_tmp ? HA_POS_ERROR : m_select_limit;
1346 
1347         if (test_if_skip_sort_order(tab, group_list, limit, false,
1348                                     &tab->table()->keys_in_use_for_group_by,
1349                                     "GROUP BY"))
1350         {
1351           ordered_index_usage= ordered_index_group_by;
1352         }
1353       }
1354 
1355       /*
1356         If we are going to use semi-join LooseScan, it will depend
1357         on the selected index scan to be used.  If index is not used
1358         for the GROUP BY, we risk that sorting is put on the LooseScan
1359         table.  In order to avoid this, force use of temporary table.
1360         TODO: Explain the quick_group part of the test below.
1361        */
1362       if ((ordered_index_usage != ordered_index_group_by) &&
1363           (tmp_table_param.quick_group ||
1364            (tab->emb_sj_nest &&
1365             tab->position()->sj_strategy == SJ_OPT_LOOSE_SCAN)))
1366       {
1367         need_tmp= true;
1368         simple_order= simple_group= false; // Force tmp table without sort
1369       }
1370     }
1371   }
1372   else if (order &&                      // ORDER BY wo/ preceding GROUP BY
1373            (simple_order || skip_sort_order)) // which is possibly skippable
1374   {
1375     if (test_if_skip_sort_order(tab, order, m_select_limit, false,
1376                                 &tab->table()->keys_in_use_for_order_by,
1377                                 "ORDER BY"))
1378     {
1379       ordered_index_usage= ordered_index_order_by;
1380     }
1381   }
1382 }
1383 
1384 
1385 /**
1386   Test if ORDER BY is a single MATCH function(ORDER BY MATCH)
1387   and sort order is descending.
1388 
1389   @param order                 pointer to ORDER struct.
1390 
1391   @retval
1392     Pointer to MATCH function if order is 'ORDER BY MATCH() DESC'
1393   @retval
1394     NULL otherwise
1395 */
1396 
test_if_ft_index_order(ORDER * order)1397 static Item_func_match *test_if_ft_index_order(ORDER *order)
1398 {
1399   if (order && order->next == NULL &&
1400       order->direction == ORDER::ORDER_DESC &&
1401       (*order->item)->type() == Item::FUNC_ITEM &&
1402       ((Item_func*) (*order->item))->functype() == Item_func::FT_FUNC)
1403     return static_cast<Item_func_match*> (*order->item)->get_master();
1404 
1405   return NULL;
1406 }
1407 
1408 /**
1409   Test if this is a prefix index.
1410 
1411   @param   table     table
1412   @param   idx       index to check
1413 
1414   @return TRUE if this is a prefix index
1415 */
is_prefix_index(TABLE * table,uint idx)1416 bool is_prefix_index(TABLE* table, uint idx)
1417 {
1418   if (!table || !table->key_info)
1419   {
1420     return false;
1421   }
1422   KEY* key_info = table->key_info;
1423   uint key_parts = key_info[idx].user_defined_key_parts;
1424   KEY_PART_INFO* key_part = key_info[idx].key_part;
1425 
1426   for (uint i = 0; i < key_parts; i++, key_part++)
1427   {
1428     if (key_part->field &&
1429       (key_part->length !=
1430         table->field[key_part->fieldnr - 1]->key_length() &&
1431         !(key_info->flags & (HA_FULLTEXT | HA_SPATIAL))))
1432     {
1433       return true;
1434     }
1435   }
1436   return false;
1437 }
1438 
1439 /**
1440   Test if one can use the key to resolve ordering.
1441 
1442   @param order               Sort order
1443   @param table               Table to sort
1444   @param idx                 Index to check
1445   @param[out] used_key_parts NULL by default, otherwise return value for
1446                              used key parts.
1447 
1448   @note
1449     used_key_parts is set to correct key parts used if return value != 0
1450     (On other cases, used_key_part may be changed)
1451     Note that the value may actually be greater than the number of index
1452     key parts. This can happen for storage engines that have the primary
1453     key parts as a suffix for every secondary key.
1454 
1455   @retval
1456     1   key is ok.
1457   @retval
1458     0   Key can't be used
1459   @retval
1460     -1   Reverse key can be used
1461 */
1462 
test_if_order_by_key(ORDER * order,TABLE * table,uint idx,uint * used_key_parts)1463 int test_if_order_by_key(ORDER *order, TABLE *table, uint idx,
1464                          uint *used_key_parts)
1465 {
1466   KEY_PART_INFO *key_part,*key_part_end;
1467   key_part=table->key_info[idx].key_part;
1468   key_part_end=key_part+table->key_info[idx].user_defined_key_parts;
1469   key_part_map const_key_parts=table->const_key_parts[idx];
1470   int reverse=0;
1471   uint key_parts;
1472   my_bool on_pk_suffix= FALSE;
1473   DBUG_ENTER("test_if_order_by_key");
1474 
1475   for (; order ; order=order->next, const_key_parts>>=1)
1476   {
1477 
1478     /*
1479       Since only fields can be indexed, ORDER BY <something> that is
1480       not a field cannot be resolved by using an index.
1481     */
1482     Item *real_itm= (*order->item)->real_item();
1483     if (real_itm->type() != Item::FIELD_ITEM)
1484       DBUG_RETURN(0);
1485 
1486     Field *field= static_cast<Item_field*>(real_itm)->field;
1487     int flag;
1488 
1489     /*
1490       Skip key parts that are constants in the WHERE clause.
1491       These are already skipped in the ORDER BY by const_expression_in_where()
1492     */
1493     for (; const_key_parts & 1 && key_part < key_part_end ;
1494          const_key_parts>>= 1)
1495       key_part++;
1496 
1497     /* Avoid usage of prefix index for sorting a partition table */
1498     if (table->part_info && key_part != table->key_info[idx].key_part &&
1499         key_part != key_part_end && is_prefix_index(table, idx))
1500      DBUG_RETURN(0);
1501 
1502     if (key_part == key_part_end)
1503     {
1504       /*
1505         We are at the end of the key. Check if the engine has the primary
1506         key as a suffix to the secondary keys. If it has continue to check
1507         the primary key as a suffix.
1508       */
1509       if (!on_pk_suffix &&
1510           (table->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX) &&
1511           table->s->primary_key != MAX_KEY &&
1512           table->s->primary_key != idx)
1513       {
1514         on_pk_suffix= TRUE;
1515         key_part= table->key_info[table->s->primary_key].key_part;
1516         key_part_end=key_part +
1517           table->key_info[table->s->primary_key].user_defined_key_parts;
1518         const_key_parts=table->const_key_parts[table->s->primary_key];
1519 
1520         for (; const_key_parts & 1 ; const_key_parts>>= 1)
1521           key_part++;
1522         /*
1523          The primary and secondary key parts were all const (i.e. there's
1524          one row).  The sorting doesn't matter.
1525         */
1526         if (key_part == key_part_end && reverse == 0)
1527         {
1528           key_parts= 0;
1529           reverse= 1;
1530           goto ok;
1531         }
1532       }
1533       else
1534         DBUG_RETURN(0);
1535     }
1536 
1537     if (key_part->field != field || !field->part_of_sortkey.is_set(idx))
1538       DBUG_RETURN(0);
1539 
1540     const ORDER::enum_order keypart_order=
1541       (key_part->key_part_flag & HA_REVERSE_SORT) ?
1542       ORDER::ORDER_DESC : ORDER::ORDER_ASC;
1543     /* set flag to 1 if we can use read-next on key, else to -1 */
1544     flag= (order->direction == keypart_order) ? 1 : -1;
1545     if (reverse && flag != reverse)
1546       DBUG_RETURN(0);
1547     reverse=flag;				// Remember if reverse
1548     key_part++;
1549   }
1550   if (on_pk_suffix)
1551   {
1552     uint used_key_parts_secondary= table->key_info[idx].user_defined_key_parts;
1553     uint used_key_parts_pk=
1554       (uint) (key_part - table->key_info[table->s->primary_key].key_part);
1555     key_parts= used_key_parts_pk + used_key_parts_secondary;
1556 
1557     if (reverse == -1 &&
1558         (!(table->file->index_flags(idx, used_key_parts_secondary - 1, 1) &
1559            HA_READ_PREV) ||
1560          !(table->file->index_flags(table->s->primary_key,
1561                                     used_key_parts_pk - 1, 1) & HA_READ_PREV)))
1562       reverse= 0;                               // Index can't be used
1563   }
1564   else
1565   {
1566     key_parts= (uint) (key_part - table->key_info[idx].key_part);
1567     if (reverse == -1 &&
1568         !(table->file->index_flags(idx, key_parts-1, 1) & HA_READ_PREV))
1569       reverse= 0;                               // Index can't be used
1570   }
1571 ok:
1572   if (used_key_parts != NULL)
1573     *used_key_parts= key_parts;
1574   DBUG_RETURN(reverse);
1575 }
1576 
1577 
1578 /**
1579   Find shortest key suitable for full table scan.
1580 
1581   @param table                 Table to scan
1582   @param usable_keys           Allowed keys
1583 
1584   @note
1585      As far as
1586      1) clustered primary key entry data set is a set of all record
1587         fields (key fields and not key fields) and
1588      2) secondary index entry data is a union of its key fields and
1589         primary key fields (at least InnoDB and its derivatives don't
1590         duplicate primary key fields there, even if the primary and
1591         the secondary keys have a common subset of key fields),
1592      then secondary index entry data is always a subset of primary key entry.
1593      Unfortunately, key_info[nr].key_length doesn't show the length
1594      of key/pointer pair but a sum of key field lengths only, thus
1595      we can't estimate index IO volume comparing only this key_length
1596      value of secondary keys and clustered PK.
1597      So, try secondary keys first, and choose PK only if there are no
1598      usable secondary covering keys or found best secondary key include
1599      all table fields (i.e. same as PK):
1600 
1601   @return
1602     MAX_KEY     no suitable key found
1603     key index   otherwise
1604 */
1605 
find_shortest_key(TABLE * table,const key_map * usable_keys)1606 uint find_shortest_key(TABLE *table, const key_map *usable_keys)
1607 {
1608   uint best= MAX_KEY;
1609   uint usable_clustered_pk= (table->file->primary_key_is_clustered() &&
1610                              table->s->primary_key != MAX_KEY &&
1611                              usable_keys->is_set(table->s->primary_key)) ?
1612                             table->s->primary_key : MAX_KEY;
1613   if (!usable_keys->is_clear_all())
1614   {
1615     uint min_length= (uint) ~0;
1616     for (uint nr=0; nr < table->s->keys ; nr++)
1617     {
1618       if (nr == usable_clustered_pk)
1619         continue;
1620       if (usable_keys->is_set(nr))
1621       {
1622         /*
1623           Can not do full index scan on rtree index because it is not
1624           supported by Innodb, probably not supported by others either.
1625          */
1626         const KEY &key_ref= table->key_info[nr];
1627         if (key_ref.key_length < min_length &&
1628             !(key_ref.flags & HA_SPATIAL))
1629         {
1630           min_length=key_ref.key_length;
1631           best=nr;
1632         }
1633       }
1634     }
1635   }
1636   if (usable_clustered_pk != MAX_KEY)
1637   {
1638     /*
1639      If the primary key is clustered and found shorter key covers all table
1640      fields then primary key scan normally would be faster because amount of
1641      data to scan is the same but PK is clustered.
1642      It's safe to compare key parts with table fields since duplicate key
1643      parts aren't allowed.
1644      */
1645     if (best == MAX_KEY ||
1646         table->key_info[best].user_defined_key_parts >= table->s->fields)
1647       best= usable_clustered_pk;
1648   }
1649   return best;
1650 }
1651 
1652 /**
1653   Test if a second key is the subkey of the first one.
1654 
1655   @param key_part              First key parts
1656   @param ref_key_part          Second key parts
1657   @param ref_key_part_end      Last+1 part of the second key
1658 
1659   @note
1660     Second key MUST be shorter than the first one.
1661 
1662   @retval
1663     1	is a subkey
1664   @retval
1665     0	no sub key
1666 */
1667 
1668 inline bool
is_subkey(KEY_PART_INFO * key_part,KEY_PART_INFO * ref_key_part,KEY_PART_INFO * ref_key_part_end)1669 is_subkey(KEY_PART_INFO *key_part, KEY_PART_INFO *ref_key_part,
1670 	  KEY_PART_INFO *ref_key_part_end)
1671 {
1672   for (; ref_key_part < ref_key_part_end; key_part++, ref_key_part++)
1673     if (!key_part->field->eq(ref_key_part->field))
1674       return 0;
1675   return 1;
1676 }
1677 
1678 
1679 /**
1680   Test if REF_OR_NULL optimization will be used if the specified
1681   ref_key is used for REF-access to 'tab'
1682 
1683   @retval
1684     true	JT_REF_OR_NULL will be used
1685   @retval
1686     false	no JT_REF_OR_NULL access
1687 */
1688 
1689 static bool
is_ref_or_null_optimized(const JOIN_TAB * tab,uint ref_key)1690 is_ref_or_null_optimized(const JOIN_TAB *tab, uint ref_key)
1691 {
1692   if (tab->keyuse())
1693   {
1694     const Key_use *keyuse= tab->keyuse();
1695     while (keyuse->key != ref_key && keyuse->table_ref == tab->table_ref)
1696       keyuse++;
1697 
1698     const table_map const_tables= tab->join()->const_table_map;
1699     while (keyuse->key == ref_key && keyuse->table_ref == tab->table_ref)
1700     {
1701       if (!(keyuse->used_tables & ~const_tables))
1702       {
1703         if (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)
1704           return true;
1705       }
1706       keyuse++;
1707     }
1708   }
1709   return false;
1710 }
1711 
1712 
1713 /**
1714   Test if we can use one of the 'usable_keys' instead of 'ref' key
1715   for sorting.
1716 
1717   @param ref			Number of key, used for WHERE clause
1718   @param usable_keys		Keys for testing
1719 
1720   @return
1721     - MAX_KEY			If we can't use other key
1722     - the number of found key	Otherwise
1723 */
1724 
1725 static uint
test_if_subkey(ORDER * order,JOIN_TAB * tab,uint ref,uint ref_key_parts,const key_map * usable_keys)1726 test_if_subkey(ORDER *order, JOIN_TAB *tab, uint ref, uint ref_key_parts,
1727 	       const key_map *usable_keys)
1728 {
1729   uint nr;
1730   uint min_length= (uint) ~0;
1731   uint best= MAX_KEY;
1732   TABLE *table= tab->table();
1733   KEY_PART_INFO *ref_key_part= table->key_info[ref].key_part;
1734   KEY_PART_INFO *ref_key_part_end= ref_key_part + ref_key_parts;
1735 
1736   for (nr= 0 ; nr < table->s->keys ; nr++)
1737   {
1738     if (usable_keys->is_set(nr) &&
1739 	table->key_info[nr].key_length < min_length &&
1740 	table->key_info[nr].user_defined_key_parts >= ref_key_parts &&
1741 	is_subkey(table->key_info[nr].key_part, ref_key_part,
1742 		  ref_key_part_end) &&
1743         !is_ref_or_null_optimized(tab, nr) &&
1744 	test_if_order_by_key(order, table, nr))
1745     {
1746       min_length= table->key_info[nr].key_length;
1747       best= nr;
1748     }
1749   }
1750   return best;
1751 }
1752 
1753 
1754 /**
1755   It is not obvious to see that test_if_skip_sort_order() never changes the
1756   plan if no_changes is true. So we double-check: creating an instance of this
1757   class saves some important access-path-related information of the current
1758   table; when the instance is destroyed, the latest access-path information is
1759   compared with saved data.
1760 */
1761 
1762 class Plan_change_watchdog
1763 {
1764 #ifndef NDEBUG
1765 public:
1766   /**
1767     @param tab_arg     table whose access path is being determined
1768     @param no_changes  whether a change to the access path is allowed
1769   */
Plan_change_watchdog(const JOIN_TAB * tab_arg,const bool no_changes_arg)1770   Plan_change_watchdog(const JOIN_TAB *tab_arg, const bool no_changes_arg)
1771   {
1772     // Only to keep gcc 4.1.2-44 silent about uninitialized variables
1773     quick= NULL;
1774     quick_index= 0;
1775     if (no_changes_arg)
1776     {
1777       tab= tab_arg;
1778       type= tab->type();
1779       if ((quick= tab->quick()))
1780         quick_index= quick->index;
1781       use_quick= tab->use_quick;
1782       ref_key= tab->ref().key;
1783       ref_key_parts= tab->ref().key_parts;
1784       index= tab->index();
1785     }
1786     else
1787     {
1788       tab= NULL;
1789       // Only to keep gcc 4.1.2-44 silent about uninitialized variables
1790       type= JT_UNKNOWN;
1791       quick= NULL;
1792       ref_key= ref_key_parts= index= 0;
1793       use_quick= QS_NONE;
1794     }
1795   }
~Plan_change_watchdog()1796   ~Plan_change_watchdog()
1797   {
1798     if (tab == NULL)
1799       return;
1800     // changes are not allowed, we verify:
1801     assert(tab->type() == type);
1802     assert(tab->quick() == quick);
1803     assert((quick == NULL) || tab->quick()->index == quick_index);
1804     assert(tab->use_quick == use_quick);
1805     assert(tab->ref().key == ref_key);
1806     assert(tab->ref().key_parts == ref_key_parts);
1807     assert(tab->index() == index);
1808   }
1809 private:
1810   const JOIN_TAB *tab;            ///< table, or NULL if changes are allowed
1811   enum join_type type;            ///< copy of tab->type()
1812   // "Range / index merge" info
1813   const QUICK_SELECT_I *quick;    ///< copy of tab->select->quick
1814   uint quick_index;               ///< copy of tab->select->quick->index
1815   enum quick_type use_quick;      ///< copy of tab->use_quick
1816   // "ref access" info
1817   int ref_key;                    ///< copy of tab->ref().key
1818   uint ref_key_parts;/// copy of tab->ref().key_parts
1819   // Other index-related info
1820   uint index;                     ///< copy of tab->index
1821 #else // in non-debug build, empty class
1822 public:
1823   Plan_change_watchdog(const JOIN_TAB *tab_arg, const bool no_changes_arg) {}
1824 #endif
1825 };
1826 
1827 
1828 /**
1829   Test if we can skip ordering by using an index.
1830 
1831   If the current plan is to use an index that provides ordering, the
1832   plan will not be changed. Otherwise, if an index can be used, the
1833   JOIN_TAB / tab->select struct is changed to use the index.
1834 
1835   The index must cover all fields in <order>, or it will not be considered.
1836 
1837   @param tab           NULL or JOIN_TAB of the accessed table
1838   @param order         Linked list of ORDER BY arguments
1839   @param select_limit  LIMIT value, or HA_POS_ERROR if no limit
1840   @param no_changes    No changes will be made to the query plan.
1841   @param map           key_map of applicable indexes.
1842   @param clause_type   "ORDER BY" etc for printing in optimizer trace
1843 
1844   @todo
1845     - sergeyp: Results of all index merge selects actually are ordered
1846     by clustered PK values.
1847 
1848   @note
1849   This function may change tmp_table_param.precomputed_group_by. This
1850   affects how create_tmp_table() treats aggregation functions, so
1851   count_field_types() must be called again to make sure this is taken
1852   into consideration.
1853 
1854   @retval
1855     0    We have to use filesort to do the sorting
1856   @retval
1857     1    We can use an index.
1858 */
1859 
1860 static bool
test_if_skip_sort_order(JOIN_TAB * tab,ORDER * order,ha_rows select_limit,const bool no_changes,const key_map * map,const char * clause_type)1861 test_if_skip_sort_order(JOIN_TAB *tab, ORDER *order, ha_rows select_limit,
1862                         const bool no_changes, const key_map *map,
1863                         const char *clause_type)
1864 {
1865   int ref_key;
1866   uint ref_key_parts= 0;
1867   int order_direction= 0;
1868   uint used_key_parts;
1869   TABLE *const table= tab->table();
1870   JOIN *const join= tab->join();
1871   THD *const thd= join->thd;
1872   QUICK_SELECT_I *const save_quick= tab->quick();
1873   int best_key= -1;
1874   bool set_up_ref_access_to_key= false;
1875   bool can_skip_sorting= false;                  // used as return value
1876   int changed_key= -1;
1877   DBUG_ENTER("test_if_skip_sort_order");
1878 
1879   /* Check that we are always called with first non-const table */
1880   assert((uint)tab->idx() == join->const_tables);
1881 
1882   Plan_change_watchdog watchdog(tab, no_changes);
1883 
1884   /* Sorting a single row can always be skipped */
1885   if (tab->type() == JT_EQ_REF ||
1886       tab->type() == JT_CONST  ||
1887       tab->type() == JT_SYSTEM)
1888   {
1889     DBUG_RETURN(1);
1890   }
1891 
1892   /*
1893     Check if FT index can be used to retrieve result in the required order.
1894     It is possible if ordering is on the first non-constant table.
1895   */
1896   if (join->order && join->simple_order)
1897   {
1898     /*
1899       Check if ORDER is DESC, ORDER BY is a single MATCH function.
1900     */
1901     Item_func_match *ft_func= test_if_ft_index_order(order);
1902     /*
1903       Two possible cases when we can skip sort order:
1904       1. FT_SORTED must be set(Natural mode, no ORDER BY).
1905       2. If FT_SORTED flag is not set then
1906       the engine should support deferred sorting. Deferred sorting means
1907       that sorting is postponed utill the start of index reading(InnoDB).
1908       In this case we set FT_SORTED flag here to let the engine know that
1909       internal sorting is needed.
1910     */
1911     if (ft_func && ft_func->ft_handler && ft_func->ordered_result())
1912     {
1913       /*
1914         FT index scan is used, so the only additional requirement is
1915         that ORDER BY MATCH function is the same as the function that
1916         is used for FT index.
1917       */
1918       if (tab->type() == JT_FT &&
1919           ft_func->eq(tab->position()->key->val, true))
1920       {
1921         ft_func->set_hints(join, FT_SORTED, select_limit, false);
1922         DBUG_RETURN(true);
1923       }
1924       /*
1925         No index is used, it's possible to use FT index for ORDER BY if
1926         LIMIT is present and does not exceed count of the records in FT index
1927         and there is no WHERE condition since a condition may potentially
1928         require more rows to be fetch from FT index.
1929       */
1930       else if (!tab->condition() &&
1931                select_limit != HA_POS_ERROR &&
1932                select_limit <= ft_func->get_count())
1933       {
1934         /* test_if_ft_index_order() always returns master MATCH function. */
1935         assert(!ft_func->master);
1936         /* ref is not set since there is no WHERE condition */
1937         assert(tab->ref().key == -1);
1938 
1939         /*Make EXPLAIN happy */
1940         tab->set_type(JT_FT);
1941         tab->ref().key= ft_func->key;
1942         tab->ref().key_parts= 0;
1943         tab->set_index(ft_func->key);
1944         tab->set_ft_func(ft_func);
1945 
1946         /* Setup FT handler */
1947         ft_func->set_hints(join, FT_SORTED, select_limit, true);
1948         ft_func->join_key= true;
1949         table->file->ft_handler= ft_func->ft_handler;
1950         DBUG_RETURN(true);
1951       }
1952     }
1953   }
1954 
1955   /*
1956     Keys disabled by ALTER TABLE ... DISABLE KEYS should have already
1957     been taken into account.
1958   */
1959   key_map usable_keys= *map;
1960 
1961   for (ORDER *tmp_order=order; tmp_order ; tmp_order=tmp_order->next)
1962   {
1963     Item *item= (*tmp_order->item)->real_item();
1964     if (item->type() != Item::FIELD_ITEM)
1965     {
1966       usable_keys.clear_all();
1967       DBUG_RETURN(0);
1968     }
1969     usable_keys.intersect(((Item_field*) item)->field->part_of_sortkey);
1970     if (usable_keys.is_clear_all())
1971       DBUG_RETURN(0);					// No usable keys
1972   }
1973   if (tab->type() == JT_REF_OR_NULL || tab->type() == JT_FT)
1974     DBUG_RETURN(0);
1975 
1976   ref_key= -1;
1977   /* Test if constant range in WHERE */
1978   if (tab->type() == JT_REF)
1979   {
1980     assert(tab->ref().key >= 0 && tab->ref().key_parts);
1981     ref_key=	   tab->ref().key;
1982     ref_key_parts= tab->ref().key_parts;
1983   }
1984   else if (tab->type() == JT_RANGE || tab->type() == JT_INDEX_MERGE)
1985   {
1986     // Range found by opt_range
1987     int quick_type= tab->quick()->get_type();
1988     /*
1989       assume results are not ordered when index merge is used
1990       TODO: sergeyp: Results of all index merge selects actually are ordered
1991       by clustered PK values.
1992     */
1993 
1994     if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE ||
1995         quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION ||
1996         quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT)
1997       DBUG_RETURN(0);
1998     ref_key=	   tab->quick()->index;
1999     ref_key_parts= tab->quick()->used_key_parts;
2000   }
2001   else if (tab->type() == JT_INDEX_SCAN)
2002   {
2003     // The optimizer has decided to use an index scan.
2004     ref_key=       tab->index();
2005     ref_key_parts= actual_key_parts(&table->key_info[tab->index()]);
2006   }
2007 
2008   Opt_trace_context * const trace= &thd->opt_trace;
2009   Opt_trace_object trace_wrapper(trace);
2010   Opt_trace_object
2011     trace_skip_sort_order(trace, "reconsidering_access_paths_for_index_ordering");
2012   trace_skip_sort_order.add_alnum("clause", clause_type);
2013   Opt_trace_array trace_steps(trace, "steps");
2014 
2015   if (ref_key >= 0)
2016   {
2017     /*
2018       We come here when ref/index scan/range scan access has been set
2019       up for this table. Do not change access method if ordering is
2020       provided already.
2021     */
2022     if (!usable_keys.is_set(ref_key))
2023     {
2024       /*
2025         We come here when ref_key is not among usable_keys, try to find a
2026         usable prefix key of that key.
2027       */
2028       uint new_ref_key;
2029       /*
2030 	If using index only read, only consider other possible index only
2031 	keys
2032       */
2033       if (table->covering_keys.is_set(ref_key))
2034 	usable_keys.intersect(table->covering_keys);
2035 
2036       if ((new_ref_key= test_if_subkey(order, tab, ref_key, ref_key_parts,
2037 				       &usable_keys)) < MAX_KEY)
2038       {
2039 	/* Found key that can be used to retrieve data in sorted order */
2040 	if (tab->ref().key >= 0)
2041         {
2042           /*
2043             We'll use ref access method on key new_ref_key. The actual change
2044             is done further down in this function where we update the plan.
2045           */
2046           set_up_ref_access_to_key= true;
2047         }
2048 	else if (!no_changes)
2049 	{
2050           /*
2051             The range optimizer constructed QUICK_RANGE for ref_key, and
2052             we want to use instead new_ref_key as the index. We can't
2053             just change the index of the quick select, because this may
2054             result in an incosistent QUICK_SELECT object. Below we
2055             create a new QUICK_SELECT from scratch so that all its
2056             parameres are set correctly by the range optimizer.
2057 
2058             Note that the range optimizer is NOT called if
2059             no_changes==true. This reason is that the range optimizer
2060             cannot find a QUICK that can return ordered result unless
2061             index access (ref or index scan) is also able to do so
2062             (which test_if_order_by_key () will tell).
2063             Admittedly, range access may be much more efficient than
2064             e.g. index scan, but the only thing that matters when
2065             no_change==true is the answer to the question: "Is it
2066             possible to avoid sorting if an index is used to access
2067             this table?". The answer does not depend on the outcome of
2068             the range optimizer.
2069           */
2070           key_map new_ref_key_map;  // Force the creation of quick select
2071           new_ref_key_map.set_bit(new_ref_key); // only for new_ref_key.
2072 
2073           Opt_trace_object trace_wrapper(trace);
2074           Opt_trace_object
2075             trace_recest(trace, "rows_estimation");
2076           trace_recest.add_utf8_table(tab->table_ref).
2077           add_utf8("index", table->key_info[new_ref_key].name);
2078           QUICK_SELECT_I *qck;
2079           const bool no_quick=
2080             test_quick_select(thd, new_ref_key_map,
2081                               0,       // empty table_map
2082                               join->calc_found_rows ?
2083                                 HA_POS_ERROR :
2084                                 join->unit->select_limit_cnt,
2085                               false,   // don't force quick range
2086                               order->direction, tab,
2087                               // we are after make_join_select():
2088                               tab->condition(), &tab->needed_reg, &qck,
2089                               tab->table()->force_index) <= 0;
2090           assert(tab->quick() == save_quick);
2091           tab->set_quick(qck);
2092           if (no_quick)
2093           {
2094             can_skip_sorting= false;
2095             goto fix_ICP;
2096           }
2097 	}
2098         ref_key= new_ref_key;
2099         changed_key= new_ref_key;
2100       }
2101     }
2102     /* Check if we get the rows in requested sorted order by using the key */
2103     if (usable_keys.is_set(ref_key) &&
2104         (order_direction= test_if_order_by_key(order,table,ref_key,
2105 					       &used_key_parts)))
2106       goto check_reverse_order;
2107   }
2108   {
2109     /*
2110       There is no ref/index scan/range scan access set up for this
2111       table, or it does not provide the requested ordering. Do a
2112       cost-based search on all keys.
2113     */
2114     uint best_key_parts= 0;
2115     uint saved_best_key_parts= 0;
2116     int best_key_direction= 0;
2117     ha_rows table_records= table->file->stats.records;
2118 
2119     /*
2120       If an index scan that cannot provide ordering has been selected
2121       then do not use the index scan key as starting hint to
2122       test_if_cheaper_ordering()
2123     */
2124     const int ref_key_hint= (order_direction == 0 &&
2125                              tab->type() == JT_INDEX_SCAN) ? -1 : ref_key;
2126 
2127     /*
2128       Does the query have a "FORCE INDEX [FOR GROUP BY] (idx)" (if
2129       clause is group by) or a "FORCE INDEX [FOR ORDER BY] (idx)" (if
2130       clause is order by)?
2131     */
2132     const bool is_group_by= join && join->grouped && order == join->group_list;
2133     const bool is_force_index= table->force_index ||
2134       (is_group_by ? table->force_index_group : table->force_index_order);
2135 
2136     /*
2137       Find an ordering index alternative over the chosen plan iff
2138       prefer_ordering_index switch is on. This switch is overridden only when
2139       force index for order/group is specified.
2140     */
2141     if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_PREFER_ORDERING_INDEX) ||
2142         is_force_index)
2143       test_if_cheaper_ordering(tab, order, table, usable_keys,
2144                                ref_key_hint,
2145                                select_limit,
2146                                &best_key, &best_key_direction,
2147                                &select_limit, &best_key_parts,
2148                                &saved_best_key_parts);
2149 
2150     if (best_key < 0)
2151     {
2152       // No usable key has been found
2153       can_skip_sorting= false;
2154       goto fix_ICP;
2155     }
2156 
2157     /*
2158       filesort() and join cache are usually faster than reading in
2159       index order and not using join cache. Don't use index scan
2160       unless:
2161        - the user specified FORCE INDEX [FOR {GROUP|ORDER} BY] (have to assume
2162          the user knows what's best)
2163        - the chosen index is clustered primary key (table scan is not cheaper)
2164     */
2165     if (!is_force_index &&
2166         (select_limit >= table_records) &&
2167         (tab->type() == JT_ALL &&
2168          join->primary_tables > join->const_tables + 1) &&
2169          ((unsigned) best_key != table->s->primary_key ||
2170           !table->file->primary_key_is_clustered()))
2171     {
2172       can_skip_sorting= false;
2173       goto fix_ICP;
2174     }
2175 
2176     if (table->quick_keys.is_set(best_key) &&
2177         !tab->quick_order_tested.is_set(best_key) &&
2178         best_key != ref_key)
2179     {
2180       tab->quick_order_tested.set_bit(best_key);
2181       Opt_trace_object trace_wrapper(trace);
2182       Opt_trace_object
2183         trace_recest(trace, "rows_estimation");
2184       trace_recest.add_utf8_table(tab->table_ref).
2185         add_utf8("index", table->key_info[best_key].name);
2186 
2187       key_map keys_to_use;           // Force the creation of quick select
2188       keys_to_use.set_bit(best_key); // only best_key.
2189       QUICK_SELECT_I *qck;
2190       test_quick_select(thd,
2191                         keys_to_use,
2192                         0,        // empty table_map
2193                         join->calc_found_rows ?
2194                         HA_POS_ERROR :
2195                         join->unit->select_limit_cnt,
2196                         true,     // force quick range
2197                         order->direction, tab, tab->condition(),
2198                         &tab->needed_reg, &qck, tab->table()->force_index);
2199       /*
2200         If tab->quick() pointed to another quick than save_quick, we would
2201         lose access to it and leak memory.
2202       */
2203       assert(tab->quick() == save_quick || tab->quick() == NULL);
2204       tab->set_quick(qck);
2205     }
2206     order_direction= best_key_direction;
2207     /*
2208       saved_best_key_parts is actual number of used keyparts found by the
2209       test_if_order_by_key function. It could differ from keyinfo->key_parts,
2210       thus we have to restore it in case of desc order as it affects
2211       QUICK_SELECT_DESC behaviour.
2212     */
2213     used_key_parts= (order_direction == -1) ?
2214       saved_best_key_parts :  best_key_parts;
2215     changed_key= best_key;
2216     // We will use index scan or range scan:
2217     set_up_ref_access_to_key= false;
2218   }
2219 
2220 check_reverse_order:
2221   assert(order_direction != 0);
2222 
2223   if (order_direction == -1)		// If ORDER BY ... DESC
2224   {
2225     if (tab->quick())
2226     {
2227       /*
2228 	Don't reverse the sort order, if it's already done.
2229         (In some cases test_if_order_by_key() can be called multiple times
2230       */
2231       if (tab->quick()->reverse_sorted())
2232       {
2233         can_skip_sorting= true;
2234         goto fix_ICP;
2235       }
2236 
2237       if (tab->quick()->reverse_sort_possible())
2238         can_skip_sorting= true;
2239       else
2240       {
2241         can_skip_sorting= false;
2242         goto fix_ICP;
2243       }
2244     }
2245     else
2246     {
2247       // Other index access (ref or scan) poses no problem
2248       can_skip_sorting= true;
2249     }
2250   }
2251   else
2252   {
2253     // ORDER BY ASC poses no problem
2254     can_skip_sorting= true;
2255   }
2256 
2257   assert(can_skip_sorting);
2258 
2259   /*
2260     Update query plan with access pattern for doing
2261     ordered access according to what we have decided
2262     above.
2263   */
2264   if (!no_changes) // We are allowed to update QEP
2265   {
2266     if (set_up_ref_access_to_key)
2267     {
2268       /*
2269         We'll use ref access method on key changed_key. In general case
2270         the index search tuple for changed_ref_key will be different (e.g.
2271         when one index is defined as (part1, part2, ...) and another as
2272         (part1, part2(N), ...) and the WHERE clause contains
2273         "part1 = const1 AND part2=const2".
2274         So we build tab->ref() from scratch here.
2275       */
2276       Key_use *keyuse= tab->keyuse();
2277       while (keyuse->key != (uint)changed_key &&
2278              keyuse->table_ref == tab->table_ref)
2279         keyuse++;
2280 
2281       if (create_ref_for_key(join, tab, keyuse, tab->prefix_tables()))
2282       {
2283         can_skip_sorting= false;
2284         goto fix_ICP;
2285       }
2286 
2287       assert(tab->type() != JT_REF_OR_NULL && tab->type() != JT_FT);
2288 
2289       // Changing the key makes filter_effect obsolete
2290       tab->position()->filter_effect= COND_FILTER_STALE;
2291     }
2292     else if (best_key >= 0)
2293     {
2294       /*
2295         If ref_key used index tree reading only ('Using index' in EXPLAIN),
2296         and best_key doesn't, then revert the decision.
2297       */
2298       if(!table->covering_keys.is_set(best_key))
2299         table->set_keyread(false);
2300       if (!tab->quick() || tab->quick() == save_quick) // created no QUICK
2301       {
2302         // Avoid memory leak:
2303         assert(tab->quick() == save_quick || tab->quick() == NULL);
2304         tab->set_quick(NULL);
2305         tab->set_index(best_key);
2306         tab->set_type(JT_INDEX_SCAN);       // Read with index_first(), index_next()
2307         /*
2308           There is a bug. When we change here, e.g. from group_min_max to
2309           index scan: loose index scan expected to read a small number of rows
2310           (jumping through the index), this small number was in
2311           position()->rows_fetched; index scan will read much more, so
2312           rows_fetched should be updated. So should the filtering effect.
2313           It is visible in main.distinct in trunk:
2314           explain SELECT distinct a from t3 order by a desc limit 2;
2315           id	select_type	table	partitions	type	possible_keys	key	key_len	ref	rows	filtered	Extra
2316           1	SIMPLE	t3	NULL	index	a	a	5	NULL	40	25.00	Using index
2317           "rows=40" should be ~200 i.e. # of records in table. Filter should be
2318           100.00 (no WHERE).
2319         */
2320         table->file->ha_index_or_rnd_end();
2321         if (thd->lex->is_explain())
2322         {
2323           /*
2324             @todo this neutralizes add_ref_to_table_cond(); as a result
2325             EXPLAIN shows no "using where" though real SELECT has one.
2326           */
2327           tab->ref().key= -1;
2328           tab->ref().key_parts= 0;
2329         }
2330         tab->position()->filter_effect= COND_FILTER_STALE;
2331       }
2332       else if (tab->type() != JT_ALL)
2333       {
2334         /*
2335           We're about to use a quick access to the table.
2336           We need to change the access method so as the quick access
2337           method is actually used.
2338         */
2339         assert(tab->quick());
2340         assert(tab->quick()->index==(uint)best_key);
2341         tab->set_type(calc_join_type(tab->quick()->get_type()));
2342         tab->use_quick=QS_RANGE;
2343         tab->ref().key= -1;
2344         tab->ref().key_parts=0;		// Don't use ref key.
2345         if (tab->quick()->is_loose_index_scan())
2346           join->tmp_table_param.precomputed_group_by= TRUE;
2347         tab->position()->filter_effect= COND_FILTER_STALE;
2348       }
2349     } // best_key >= 0
2350 
2351     if (order_direction == -1)		// If ORDER BY ... DESC
2352     {
2353       if (tab->quick())
2354       {
2355         /* ORDER BY range_key DESC */
2356         QUICK_SELECT_I *tmp= tab->quick()->make_reverse(used_key_parts);
2357         if (!tmp)
2358         {
2359           /* purecov: begin inspected */
2360           can_skip_sorting= false;      // Reverse sort failed -> filesort
2361           goto fix_ICP;
2362           /* purecov: end */
2363         }
2364         if (tab->quick() != tmp && tab->quick() != save_quick)
2365           delete tab->quick();
2366         tab->set_quick(tmp);
2367         tab->set_type(calc_join_type(tmp->get_type()));
2368         tab->position()->filter_effect= COND_FILTER_STALE;
2369       }
2370       else if (tab->type() == JT_REF &&
2371                tab->ref().key_parts <= used_key_parts)
2372       {
2373         /*
2374           SELECT * FROM t1 WHERE a=1 ORDER BY a DESC,b DESC
2375 
2376           Use a traversal function that starts by reading the last row
2377           with key part (A) and then traverse the index backwards.
2378         */
2379         tab->reversed_access= true;
2380 
2381         /*
2382           The current implementation of join_read_prev_same() does not
2383           work well in combination with ICP and can lead to increased
2384           execution time. Setting changed_key to the current key
2385           (based on that we change the access order for the key) will
2386           ensure that a pushed index condition will be cancelled.
2387         */
2388         changed_key= tab->ref().key;
2389       }
2390       else if (tab->type() == JT_INDEX_SCAN)
2391         tab->reversed_access= true;
2392     }
2393     else if (tab->quick())
2394       tab->quick()->need_sorted_output();
2395 
2396   } // QEP has been modified
2397 
2398 fix_ICP:
2399   /*
2400     Cleanup:
2401     We may have both a 'tab->quick()' and 'save_quick' (original)
2402     at this point. Delete the one that we won't use.
2403   */
2404   if (can_skip_sorting && !no_changes)
2405   {
2406     if (tab->type() == JT_INDEX_SCAN &&
2407         select_limit < table->file->stats.records)
2408     {
2409       tab->position()->rows_fetched= select_limit;
2410       tab->position()->filter_effect= COND_FILTER_STALE_NO_CONST;
2411     }
2412 
2413     // Keep current (ordered) tab->quick()
2414     if (save_quick != tab->quick())
2415       delete save_quick;
2416   }
2417   else
2418   {
2419     // Restore original save_quick
2420     if (tab->quick() != save_quick)
2421     {
2422       delete tab->quick();
2423       tab->set_quick(save_quick);
2424     }
2425   }
2426 
2427   trace_steps.end();
2428   Opt_trace_object
2429     trace_change_index(trace, "index_order_summary");
2430   trace_change_index.add_utf8_table(tab->table_ref)
2431     .add("index_provides_order", can_skip_sorting)
2432     .add_alnum("order_direction", order_direction == 1 ? "asc" :
2433                ((order_direction == -1) ? "desc" :
2434                 "undefined"));
2435 
2436   if (changed_key >= 0)
2437   {
2438     // switching to another index
2439     // Should be no pushed conditions at this point
2440     assert(!table->file->pushed_idx_cond);
2441     if (unlikely(trace->is_started()))
2442     {
2443       trace_change_index.add_utf8("index", table->key_info[changed_key].name);
2444       trace_change_index.add("plan_changed", !no_changes);
2445       if (!no_changes)
2446         trace_change_index.add_alnum("access_type", join_type_str[tab->type()]);
2447     }
2448   }
2449   else if (unlikely(trace->is_started()))
2450   {
2451     trace_change_index.add_utf8("index",
2452                                 ref_key >= 0 ?
2453                                 table->key_info[ref_key].name : "unknown");
2454     trace_change_index.add("plan_changed", false);
2455   }
2456   DBUG_RETURN(can_skip_sorting);
2457 }
2458 
2459 
2460 /**
2461   Prune partitions for all tables of a join (query block).
2462 
2463   Requires that tables have been locked.
2464 
2465   @returns false if success, true if error
2466 */
2467 
prune_table_partitions()2468 bool JOIN::prune_table_partitions()
2469 {
2470   assert(select_lex->partitioned_table_count);
2471 
2472   for (TABLE_LIST *tbl= select_lex->leaf_tables; tbl; tbl= tbl->next_leaf)
2473   {
2474     /*
2475       If tbl->embedding!=NULL that means that this table is in the inner
2476       part of the nested outer join, and we can't do partition pruning
2477       (TODO: check if this limitation can be lifted.
2478              This also excludes semi-joins.  Is that intentional?)
2479       This will try to prune non-static conditions, which can
2480       be used after the tables are locked.
2481     */
2482     if (!tbl->embedding)
2483     {
2484       Item *prune_cond= tbl->join_cond_optim() ?
2485                         tbl->join_cond_optim() : where_cond;
2486       if (prune_partitions(thd, tbl->table, prune_cond))
2487         return true;
2488     }
2489   }
2490 
2491   return false;
2492 }
2493 
2494 
2495 /**
2496   A helper function to check whether it's better to use range than ref.
2497 
2498   @details
2499   Heuristic: Switch from 'ref' to 'range' access if 'range'
2500   access can utilize more keyparts than 'ref' access. Conditions
2501   for doing switching:
2502 
2503   1) Range access is possible Or tab->dodgy_ref_cost is set.
2504   2) This function is not relevant for FT, since there is no range access for
2505      that type of index.
2506   3) Used parts of key shouldn't have nullable parts & ref_or_null isn't used.
2507   4) 'ref' access depends on a constant, not a value read from a
2508      table earlier in the join sequence.
2509 
2510      Rationale: if 'ref' depends on a value from another table,
2511      the join condition is not used to limit the rows read by
2512      'range' access (that would require dynamic range - 'Range
2513      checked for each record'). In other words, if 'ref' depends
2514      on a value from another table, we have a query with
2515      conditions of the form
2516 
2517       this_table.idx_col1 = other_table.col AND   <<- used by 'ref'
2518       this_table.idx_col1 OP <const> AND          <<- used by 'range'
2519       this_table.idx_col2 OP <const> AND ...      <<- used by 'range'
2520 
2521      and an index on (idx_col1,idx_col2,...). But the fact that
2522      'range' access uses more keyparts does not mean that it is
2523      more selective than 'ref' access because these access types
2524      utilize different parts of the query condition. We
2525      therefore trust the cost based choice made by
2526      best_access_path() instead of forcing a heuristic choice
2527      here.
2528      5a) 'ref' access and 'range' access uses the same index.
2529      5b) 'range' access uses more keyparts than 'ref' access.
2530 
2531      OR
2532 
2533      6) Ref has borrowed the index estimate from range and created a cost
2534         estimate (See Optimize_table_order::find_best_ref). This will be a
2535         problem if range built it's row estimate using a larger number of key
2536         parts than ref. In such a case, shift to range access over the same
2537         index. So run the range optimizer with that index as the only choice.
2538         (Condition 5 is not relevant here since it has been tested in
2539         find_best_ref.)
2540 
2541   @param thd THD      To re-run range optimizer.
2542   @param tab JOIN_TAB To check the above conditions.
2543 
2544   @return true   Range is better than ref
2545   @return false  Ref is better or switch isn't possible
2546 
2547   @todo: This decision should rather be made in best_access_path()
2548 */
2549 
can_switch_from_ref_to_range(THD * thd,JOIN_TAB * tab)2550 static bool can_switch_from_ref_to_range(THD *thd, JOIN_TAB *tab)
2551 {
2552   if ((tab->quick() || tab->dodgy_ref_cost) &&               // 1)
2553       tab->position()->key->keypart != FT_KEYPART)           // 2)
2554   {
2555     uint keyparts= 0, length= 0;
2556     table_map dep_map= 0;
2557     bool maybe_null= false;
2558 
2559     calc_length_and_keyparts(tab->position()->key, tab,
2560                              tab->position()->key->key,
2561                              tab->prefix_tables(), NULL, &length, &keyparts,
2562                              &dep_map, &maybe_null);
2563     if (maybe_null ||                                        // 3)
2564         dep_map)                                             // 4)
2565       return false;
2566 
2567     if (tab->quick() &&
2568         tab->position()->key->key == tab->quick()->index)    // 5a)
2569       return length < tab->quick()->max_used_key_length;     // 5b)
2570     else if (tab->dodgy_ref_cost)                            // 6)
2571     {
2572       key_map new_ref_key_map;
2573       new_ref_key_map.set_bit(tab->position()->key->key);
2574 
2575       Opt_trace_context * const trace= &thd->opt_trace;
2576       Opt_trace_object trace_wrapper(trace);
2577 
2578       Opt_trace_object
2579         can_switch(trace, "check_if_range_uses_more_keyparts_than_ref");
2580       Opt_trace_object
2581         trace_setup_cond(trace, "rerunning_range_optimizer_for_single_index");
2582 
2583       QUICK_SELECT_I *qck;
2584       if (test_quick_select(thd, new_ref_key_map,
2585                             0,       // empty table_map
2586                             tab->join()->row_limit,
2587                             false,   // don't force quick range
2588                             ORDER::ORDER_NOT_RELEVANT,
2589                             tab,
2590                             tab->join_cond() ? tab->join_cond() :
2591                             tab->join()->where_cond,
2592                             &tab->needed_reg,
2593                             &qck, true) > 0)
2594       {
2595         if (length < qck->max_used_key_length)
2596         {
2597           delete tab->quick();
2598           tab->set_quick(qck);
2599           return true;
2600         }
2601         else
2602         {
2603           Opt_trace_object (trace, "access_type_unchanged").
2604             add("ref_key_length", length).
2605             add("range_key_length", qck->max_used_key_length);
2606           delete qck;
2607         }
2608       }
2609     }
2610   }
2611   return false;
2612 }
2613 
2614 /**
2615  An utility function - apply heuristics and optimize access methods to tables.
2616  Currently this function can change REF to RANGE and ALL to INDEX scan if
2617  latter is considered to be better (not cost-based) than the former.
2618  @note Side effect - this function could set 'Impossible WHERE' zero
2619  result.
2620 */
2621 
adjust_access_methods()2622 void JOIN::adjust_access_methods()
2623 {
2624   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
2625   for (uint i= const_tables; i < tables; i++)
2626   {
2627     JOIN_TAB *const tab= best_ref[i];
2628     TABLE_LIST *const tl= tab->table_ref;
2629 
2630     if (tab->type() == JT_ALL)
2631     {
2632       /*
2633        It's possible to speedup query by switching from full table scan to
2634        the scan of covering index, due to less data being read.
2635        Prerequisites for this are:
2636        1) Keyread (i.e index only scan) is allowed (table isn't updated/deleted
2637          from)
2638        2) Covering indexes are available
2639        3) This isn't a derived table/materialized view
2640       */
2641       if (!tab->table()->no_keyread &&                                //  1
2642           !tab->table()->covering_keys.is_clear_all() &&              //  2
2643           !tl->uses_materialization())                                //  3
2644       {
2645         /*
2646         It has turned out that the change commented out below, while speeding
2647         things up for disk-bound loads, slows them down for cases when the data
2648         is in disk cache (see BUG#35850):
2649         //  See bug #26447: "Using the clustered index for a table scan
2650         //  is always faster than using a secondary index".
2651         if (table->s->primary_key != MAX_KEY &&
2652             table->file->primary_key_is_clustered())
2653           tab->index= table->s->primary_key;
2654         else
2655           tab->index=find_shortest_key(table, & table->covering_keys);
2656         */
2657         if (tab->position()->sj_strategy != SJ_OPT_LOOSE_SCAN)
2658           tab->set_index(find_shortest_key(tab->table(), &tab->table()->covering_keys));
2659         tab->set_type(JT_INDEX_SCAN);      // Read with index_first / index_next
2660         // From table scan to index scan, thus filter effect needs no recalc.
2661       }
2662     }
2663     else if (tab->type() == JT_REF)
2664     {
2665       if (can_switch_from_ref_to_range(thd, tab))
2666       {
2667         tab->set_type(JT_RANGE);
2668 
2669         Opt_trace_context * const trace= &thd->opt_trace;
2670         Opt_trace_object wrapper(trace);
2671         Opt_trace_object (trace, "access_type_changed").
2672           add_utf8_table(tl).
2673           add_utf8("index",
2674                    tab->table()->key_info[tab->position()->key->key].name).
2675           add_alnum("old_type", "ref").
2676           add_alnum("new_type", join_type_str[tab->type()]).
2677           add_alnum("cause", "uses_more_keyparts");
2678 
2679         tab->use_quick= QS_RANGE;
2680         tab->position()->filter_effect= COND_FILTER_STALE;
2681       }
2682       else
2683       {
2684         // Cleanup quick, REF/REF_OR_NULL/EQ_REF, will be clarified later
2685         delete tab->quick();
2686         tab->set_quick(NULL);
2687       }
2688     }
2689     // Ensure AM consistency
2690     assert(!(tab->quick() && (tab->type() == JT_REF || tab->type() == JT_ALL)));
2691     assert((tab->type() != JT_RANGE && tab->type() != JT_INDEX_MERGE) ||
2692            tab->quick());
2693     if (!tab->const_keys.is_clear_all() &&
2694         tab->table()->reginfo.impossible_range &&
2695         ((i == const_tables && tab->type() == JT_REF) ||
2696          ((tab->type() == JT_ALL || tab->type() == JT_RANGE ||
2697            tab->type() == JT_INDEX_MERGE || tab->type() == JT_INDEX_SCAN) &&
2698            tab->use_quick != QS_RANGE)) &&
2699         !tab->table_ref->is_inner_table_of_outer_join())
2700       zero_result_cause=
2701         "Impossible WHERE noticed after reading const tables";
2702   }
2703 }
2704 
2705 
alloc_jtab_array(THD * thd,uint table_count)2706 static JOIN_TAB *alloc_jtab_array(THD *thd, uint table_count)
2707 {
2708   JOIN_TAB *t= new (thd->mem_root) JOIN_TAB[table_count];
2709   if (!t)
2710     return NULL;                                /* purecov: inspected */
2711 
2712   QEP_shared *qs= new (thd->mem_root) QEP_shared[table_count];
2713   if (!qs)
2714     return NULL;                                /* purecov: inspected */
2715 
2716   for (uint i= 0; i < table_count; ++i)
2717     t[i].set_qs(qs++);
2718 
2719   return t;
2720 }
2721 
2722 
2723 /**
2724   Set up JOIN_TAB structs according to the picked join order in best_positions.
2725   This allocates execution structures so may be called only after we have the
2726   very final plan. It must be called after
2727   Optimize_table_order::fix_semijoin_strategies().
2728 
2729   @return False if success, True if error
2730 
2731   @details
2732     - create join->join_tab array and copy from existing JOIN_TABs in join order
2733     - create helper structs for materialized semi-join handling
2734     - finalize semi-join strategy choices
2735     - Number of intermediate tables "tmp_tables" is calculated.
2736     - "tables" and "primary_tables" are recalculated.
2737     - for full and index scans info of estimated # of records is updated.
2738     - in a helper function:
2739       - all heuristics are applied and the final access method type is picked
2740         for each join_tab (only test_if_skip_sortorder() could override it)
2741       - AM consistency is ensured (e.g only range and index merge are allowed
2742         to have quick select set).
2743       - if "Impossible WHERE" is detected - appropriate zero_result_cause is
2744         set.
2745 
2746    Notice that intermediate tables will not have a POSITION reference; and they
2747    will not have a TABLE reference before the final stages of code generation.
2748 
2749    @todo the block which sets tab->type should move to adjust_access_methods
2750    for unification.
2751 */
2752 
get_best_combination()2753 bool JOIN::get_best_combination()
2754 {
2755   DBUG_ENTER("JOIN::get_best_combination");
2756 
2757   // At this point "tables" and "primary"tables" represent the same:
2758   assert(tables == primary_tables);
2759 
2760   /*
2761     Allocate additional space for tmp tables.
2762     Number of plan nodes:
2763       # of regular input tables (including semi-joined ones) +
2764       # of semi-join nests for materialization +
2765       1? + // For GROUP BY
2766       1? + // For DISTINCT
2767       1? + // For aggregation functions aggregated in outer query
2768            // when used with distinct
2769       1? + // For ORDER BY
2770       1?   // buffer result
2771     Up to 2 tmp tables are actually used, but it's hard to tell exact number
2772     at this stage.
2773   */
2774   uint num_tmp_tables= (group_list ? 1 : 0) +
2775                        (select_distinct ?
2776                         (tmp_table_param.outer_sum_func_count ? 2 : 1) : 0) +
2777                        (order ? 1 : 0) +
2778                        (select_lex->active_options() &
2779                         (SELECT_BIG_RESULT | OPTION_BUFFER_RESULT) ? 1 : 0);
2780   if (num_tmp_tables > 2)
2781     num_tmp_tables= 2;
2782 
2783   /*
2784     Rearrange queries with materialized semi-join nests so that the semi-join
2785     nest is replaced with a reference to a materialized temporary table and all
2786     materialized subquery tables are placed after the intermediate tables.
2787     After the following loop, "inner_target" is the position of the first
2788     subquery table (if any). "outer_target" is the position of first outer
2789     table, and will later be used to track the position of any materialized
2790     temporary tables.
2791   */
2792   const bool has_semijoin= !select_lex->sj_nests.is_empty();
2793   uint outer_target= 0;
2794   uint inner_target= primary_tables + num_tmp_tables;
2795   uint sjm_nests= 0;
2796 
2797   if (has_semijoin)
2798   {
2799     for (uint tableno= 0; tableno < primary_tables; )
2800     {
2801       if (sj_is_materialize_strategy(best_positions[tableno].sj_strategy))
2802       {
2803         sjm_nests++;
2804         inner_target-= (best_positions[tableno].n_sj_tables - 1);
2805         tableno+= best_positions[tableno].n_sj_tables;
2806       }
2807       else
2808         tableno++;
2809     }
2810   }
2811 
2812   JOIN_TAB *tmp_join_tabs= NULL;
2813   if (sjm_nests + num_tmp_tables)
2814   {
2815     // join_tab array only has "primary_tables" tables. We need those more:
2816     if (!(tmp_join_tabs= alloc_jtab_array(thd, sjm_nests + num_tmp_tables)))
2817       DBUG_RETURN(true);                        /* purecov: inspected */
2818   }
2819 
2820   // To check that we fill the array correctly: fill it with zeros first
2821   memset(best_ref, 0, sizeof(JOIN_TAB*) * (primary_tables + sjm_nests +
2822                                            num_tmp_tables));
2823 
2824   int sjm_index= tables;  // Number assigned to materialized temporary table
2825   int remaining_sjm_inner= 0;
2826   bool err= false;
2827   for (uint tableno= 0; tableno < tables; tableno++)
2828   {
2829     POSITION *const pos= best_positions + tableno;
2830     if (has_semijoin && sj_is_materialize_strategy(pos->sj_strategy))
2831     {
2832       assert(outer_target < inner_target);
2833 
2834       TABLE_LIST *const sj_nest= pos->table->emb_sj_nest;
2835 
2836       // Handle this many inner tables of materialized semi-join
2837       remaining_sjm_inner= pos->n_sj_tables;
2838 
2839       /*
2840         If we fail in some allocation below, we cannot bail out immediately;
2841         that would put us in a difficult situation to clean up; imagine we
2842         have planned this layout:
2843           outer1 - sj_mat_tmp1 - outer2 - sj_mat_tmp2 - outer3
2844         We have successfully filled a JOIN_TAB for sj_mat_tmp1, and are
2845         failing to fill a JOIN_TAB for sj_mat_tmp2 (OOM). So we want to quit
2846         this function, which will lead to cleanup functions.
2847         But sj_mat_tmp1 is in this->best_ref only, outer3 is in this->join_tab
2848         only: what is the array to traverse for cleaning up? What is the
2849         number of tables to loop over?
2850         So: if we fail in the present loop, we record the error but continue
2851         filling best_ref; when it's fully filled, bail out, because then
2852         best_ref can be used as reliable array for cleaning up.
2853       */
2854       JOIN_TAB *const tab= tmp_join_tabs++;
2855       best_ref[outer_target]= tab;
2856       tab->set_join(this);
2857       tab->set_idx(outer_target);
2858 
2859       /*
2860         Up to this point there cannot be a failure. JOIN_TAB has been filled
2861         enough to be clean-able.
2862       */
2863 
2864       Semijoin_mat_exec *const sjm_exec=
2865         new (thd->mem_root)
2866         Semijoin_mat_exec(sj_nest,
2867                           (pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN),
2868                           remaining_sjm_inner, outer_target, inner_target);
2869 
2870       tab->set_sj_mat_exec(sjm_exec);
2871 
2872       if (!sjm_exec ||
2873           setup_semijoin_materialized_table(tab, sjm_index,
2874                                             pos, best_positions + sjm_index))
2875         err= true;                              /* purecov: inspected */
2876 
2877       outer_target++;
2878       sjm_index++;
2879     }
2880     /*
2881       Locate join_tab target for the table we are considering.
2882       (remaining_sjm_inner becomes negative for non-SJM tables, this can be
2883        safely ignored).
2884     */
2885     const uint target=
2886       (remaining_sjm_inner--) > 0 ? inner_target++ : outer_target++;
2887     JOIN_TAB *const tab= pos->table;
2888 
2889     best_ref[target]= tab;
2890     tab->set_idx(target);
2891     tab->set_position(pos);
2892     TABLE *const table= tab->table();
2893     if (tab->type() != JT_CONST && tab->type() != JT_SYSTEM)
2894     {
2895       if (pos->sj_strategy == SJ_OPT_LOOSE_SCAN && tab->quick() &&
2896           tab->quick()->index != pos->loosescan_key)
2897       {
2898         /*
2899           We must use the duplicate-eliminating index, so this QUICK is not
2900           an option.
2901         */
2902         delete tab->quick();
2903         tab->set_quick(NULL);
2904       }
2905       if (!pos->key)
2906       {
2907         if (tab->quick())
2908           tab->set_type(calc_join_type(tab->quick()->get_type()));
2909         else
2910           tab->set_type(JT_ALL);
2911       }
2912       else
2913         // REF or RANGE, clarify later when prefix tables are set for JOIN_TABs
2914         tab->set_type(JT_REF);
2915     }
2916     assert(tab->type() != JT_UNKNOWN);
2917 
2918     assert(table->reginfo.join_tab == tab);
2919     if (!tab->join_cond())
2920       table->reginfo.not_exists_optimize= false;     // Only with LEFT JOIN
2921     map2table[tab->table_ref->tableno()]= tab;
2922   }
2923 
2924   // Count the materialized semi-join tables as regular input tables
2925   tables+= sjm_nests + num_tmp_tables;
2926   // Set the number of non-materialized tables:
2927   primary_tables= outer_target;
2928 
2929   /*
2930     Between the last outer table or sj-mat tmp table, and the first sj-mat
2931     inner table, there may be 2 slots for sort/group/etc tmp tables:
2932   */
2933   for (uint i= 0; i < num_tmp_tables; ++i)
2934   {
2935     const uint idx= outer_target + i;
2936     tmp_join_tabs->set_join(this);
2937     tmp_join_tabs->set_idx(idx);
2938     assert(best_ref[idx] == NULL); // verify that not overwriting
2939     best_ref[idx]= tmp_join_tabs++;
2940     /*
2941       note that set_table() cannot be called yet. We may not even use this
2942       JOIN_TAB in the end, it's dummy at the moment. Which can be tested with
2943       "position()!=NULL".
2944     */
2945   }
2946 
2947   // make array unreachable: should walk JOIN_TABs by best_ref now
2948   join_tab= NULL;
2949 
2950   if (err)
2951     DBUG_RETURN(true);                          /* purecov: inspected */
2952 
2953   if (has_semijoin)
2954   {
2955     set_semijoin_info();
2956 
2957     // Update equalities and keyuses after having added SJ materialization
2958     if (update_equalities_for_sjm())
2959       DBUG_RETURN(true);
2960   }
2961   if (!plan_is_const())
2962   {
2963     // Assign map of "available" tables to all tables belonging to query block
2964     set_prefix_tables();
2965     adjust_access_methods();
2966   }
2967   // Calculate outer join info
2968   if (select_lex->outer_join)
2969     make_outerjoin_info();
2970 
2971   // sjm is no longer needed, trash it. To reuse it, reset its members!
2972   List_iterator<TABLE_LIST> sj_list_it(select_lex->sj_nests);
2973   TABLE_LIST *sj_nest;
2974   while ((sj_nest= sj_list_it++))
2975     TRASH(&sj_nest->nested_join->sjm, sizeof(sj_nest->nested_join->sjm));
2976 
2977   DBUG_RETURN(false);
2978 }
2979 
2980 
2981 /*
2982   Revise usage of join buffer for the specified table and the whole nest
2983 
2984   SYNOPSIS
2985     revise_cache_usage()
2986       tab    join table for which join buffer usage is to be revised
2987 
2988   DESCRIPTION
2989     The function revise the decision to use a join buffer for the table 'tab'.
2990     If this table happened to be among the inner tables of a nested outer join/
2991     semi-join the functions denies usage of join buffers for all of them
2992 
2993   RETURN
2994     none
2995 */
2996 
2997 static
revise_cache_usage(JOIN_TAB * join_tab)2998 void revise_cache_usage(JOIN_TAB *join_tab)
2999 {
3000   plan_idx first_inner= join_tab->first_inner();
3001   JOIN *const join= join_tab->join();
3002   if (first_inner != NO_PLAN_IDX)
3003   {
3004     plan_idx end_tab= join_tab->idx();
3005     for (first_inner= join_tab->first_inner();
3006          first_inner != NO_PLAN_IDX;
3007          first_inner= join->best_ref[first_inner]->first_upper())
3008     {
3009       for (plan_idx i= end_tab-1; i >= first_inner; --i)
3010         join->best_ref[i]->set_use_join_cache(JOIN_CACHE::ALG_NONE);
3011       end_tab= first_inner;
3012     }
3013   }
3014   else if (join_tab->get_sj_strategy() == SJ_OPT_FIRST_MATCH)
3015   {
3016     plan_idx first_sj_inner= join_tab->first_sj_inner();
3017     for (plan_idx i= join_tab->idx()-1; i >= first_sj_inner; --i)
3018     {
3019       JOIN_TAB *tab= join->best_ref[i];
3020       if (tab->first_sj_inner() == first_sj_inner)
3021         tab->set_use_join_cache(JOIN_CACHE::ALG_NONE);
3022     }
3023   }
3024   else
3025     join_tab->set_use_join_cache(JOIN_CACHE::ALG_NONE);
3026   assert(join->qep_tab == NULL);
3027 }
3028 
3029 
3030 /**
3031   Set up join buffering for a specified table, if possible.
3032 
3033   @param tab             joined table to check join buffer usage for
3034   @param join            join for which the check is performed
3035   @param no_jbuf_after   don't use join buffering after table with this number
3036 
3037   @return false if successful, true if error.
3038           Currently, allocation errors for join cache objects are ignored,
3039           and regular execution is chosen silently.
3040 
3041   @details
3042     The function finds out whether the table 'tab' can be joined using a join
3043     buffer. This check is performed after the best execution plan for 'join'
3044     has been chosen. If the function decides that a join buffer can be employed
3045     then it selects the most appropriate join cache type, which later will
3046     be instantiated by init_join_cache().
3047     If it has already been decided to not use join buffering for this table,
3048     no action is taken.
3049 
3050     Often it is already decided that join buffering will be used earlier in
3051     the optimization process, and this will also ensure that the most correct
3052     cost for the operation is calculated, and hence the probability of
3053     choosing an optimal join plan is higher. However, some join buffering
3054     decisions cannot currently be taken before this stage, hence we need this
3055     function to decide the most accurate join buffering strategy.
3056 
3057     @todo Long-term it is the goal that join buffering strategy is decided
3058     when the plan is selected.
3059 
3060     The result of the check and the type of the join buffer to be used
3061     depend on:
3062       - the access method to access rows of the joined table
3063       - whether the join table is an inner table of an outer join or semi-join
3064       - the optimizer_switch settings for join buffering
3065       - the join 'options'.
3066     In any case join buffer is not used if the number of the joined table is
3067     greater than 'no_jbuf_after'.
3068 
3069     If block_nested_loop is turned on, and if all other criteria for using
3070     join buffering is fulfilled (see below), then join buffer is used
3071     for any join operation (inner join, outer join, semi-join) with 'JT_ALL'
3072     access method.  In that case, a JOIN_CACHE_BNL type is always employed.
3073 
3074     If an index is used to access rows of the joined table and batched_key_access
3075     is on, then a JOIN_CACHE_BKA type is employed. (Unless debug flag,
3076     test_bka unique, is set, then a JOIN_CACHE_BKA_UNIQUE type is employed
3077     instead.)
3078 
3079     If the function decides that a join buffer can be used to join the table
3080     'tab' then it sets @c tab->use_join_cache to reflect the chosen algorithm.
3081 
3082   @note
3083     For a nested outer join/semi-join, currently, we either use join buffers for
3084     all inner tables or for none of them.
3085 
3086   @todo
3087     Support BKA inside SJ-Materialization nests. When doing this, we'll need
3088     to only store sj-inner tables in the join buffer.
3089 #if 0
3090         JOIN_TAB *first_tab= join->join_tab+join->const_tables;
3091         uint n_tables= i-join->const_tables;
3092         / *
3093           We normally put all preceding tables into the join buffer, except
3094           for the constant tables.
3095           If we're inside a semi-join materialization nest, e.g.
3096 
3097              outer_tbl1  outer_tbl2  ( inner_tbl1, inner_tbl2 ) ...
3098                                                        ^-- we're here
3099 
3100           then we need to put into the join buffer only the tables from
3101           within the nest.
3102         * /
3103         if (i >= first_sjm_table && i < last_sjm_table)
3104         {
3105           n_tables= i - first_sjm_table; // will be >0 if we got here
3106           first_tab= join->join_tab + first_sjm_table;
3107         }
3108 #endif
3109 
3110 */
3111 
setup_join_buffering(JOIN_TAB * tab,JOIN * join,uint no_jbuf_after)3112 static bool setup_join_buffering(JOIN_TAB *tab, JOIN *join, uint no_jbuf_after)
3113 {
3114   ASSERT_BEST_REF_IN_JOIN_ORDER(join);
3115   Cost_estimate cost;
3116   ha_rows rows;
3117   uint bufsz= 4096;
3118   uint join_cache_flags = 0;
3119   const bool bnl_on= hint_table_state(join->thd, tab->table_ref->table,
3120                                       BNL_HINT_ENUM, OPTIMIZER_SWITCH_BNL);
3121   const bool bka_on= hint_table_state(join->thd, tab->table_ref->table,
3122                                       BKA_HINT_ENUM, OPTIMIZER_SWITCH_BKA);
3123 
3124   const uint tableno= tab->idx();
3125   const uint tab_sj_strategy= tab->get_sj_strategy();
3126   bool use_bka_unique= false;
3127   DBUG_EXECUTE_IF("test_bka_unique", use_bka_unique= true;);
3128 
3129   /*
3130     If all key_parts are null_rejecting, the MultiRangeRowIterator will
3131     eliminate all NULL values in the key set, such that
3132     HA_MRR_NO_NULL_ENDPOINTS can be promised.
3133   */
3134   const key_part_map keypart_map = make_prev_keypart_map(tab->ref().key_parts);
3135   if (tab->ref().null_rejecting == keypart_map) {
3136     join_cache_flags |= HA_MRR_NO_NULL_ENDPOINTS;
3137   }
3138 
3139   // Set preliminary join cache setting based on decision from greedy search
3140   tab->set_use_join_cache(tab->position()->use_join_buffer ?
3141                           JOIN_CACHE::ALG_BNL : JOIN_CACHE::ALG_NONE);
3142 
3143   if (tableno == join->const_tables)
3144   {
3145     assert(tab->use_join_cache() == JOIN_CACHE::ALG_NONE);
3146     return false;
3147   }
3148 
3149   if (!(bnl_on || bka_on))
3150     goto no_join_cache;
3151 
3152   /*
3153     psergey-todo: why the below when execution code seems to handle the
3154     "range checked for each record" case?
3155   */
3156   if (tab->use_quick == QS_DYNAMIC_RANGE)
3157     goto no_join_cache;
3158 
3159   /* No join buffering if prevented by no_jbuf_after */
3160   if (tableno > no_jbuf_after)
3161     goto no_join_cache;
3162 
3163   /*
3164     An inner table of an outer join nest must not use join buffering if
3165     the first inner table of that outer join nest does not use join buffering.
3166     This condition is not handled by earlier optimizer stages.
3167   */
3168   if (tab->first_inner() != NO_PLAN_IDX &&
3169       tab->first_inner() != tab->idx() &&
3170       !join->best_ref[tab->first_inner()]->use_join_cache())
3171     goto no_join_cache;
3172   /*
3173     The first inner table of an outer join nest must not use join buffering
3174     if the tables in the embedding outer join nest do not use join buffering.
3175     This condition is not handled by earlier optimizer stages.
3176   */
3177   if (tab->first_upper() != NO_PLAN_IDX &&
3178       !join->best_ref[tab->first_upper()]->use_join_cache())
3179     goto no_join_cache;
3180 
3181   switch (tab_sj_strategy)
3182   {
3183   case SJ_OPT_FIRST_MATCH:
3184     /*
3185       Use join cache with FirstMatch semi-join strategy only when semi-join
3186       contains only one table.
3187     */
3188     if (!tab->is_single_inner_of_semi_join())
3189     {
3190       assert(tab->use_join_cache() == JOIN_CACHE::ALG_NONE);
3191       goto no_join_cache;
3192     }
3193     break;
3194 
3195   case SJ_OPT_LOOSE_SCAN:
3196     /* No join buffering if this semijoin nest is handled by loosescan */
3197     assert(tab->use_join_cache() == JOIN_CACHE::ALG_NONE);
3198     goto no_join_cache;
3199 
3200   case SJ_OPT_MATERIALIZE_LOOKUP:
3201   case SJ_OPT_MATERIALIZE_SCAN:
3202     /*
3203       The Materialize strategies reuse the join_tab belonging to the
3204       first table that was materialized. Neither table can use join buffering:
3205       - The first table in a join never uses join buffering.
3206       - The join_tab used for looking up a row in the materialized table, or
3207         scanning the rows of a materialized table, cannot use join buffering.
3208       We allow join buffering for the remaining tables of the materialized
3209       semi-join nest.
3210     */
3211     if (tab->first_sj_inner() == tab->idx())
3212     {
3213       assert(tab->use_join_cache() == JOIN_CACHE::ALG_NONE);
3214       goto no_join_cache;
3215     }
3216     break;
3217 
3218   case SJ_OPT_DUPS_WEEDOUT:
3219     // This strategy allows the same join buffering as a regular join would.
3220   case SJ_OPT_NONE:
3221     break;
3222   }
3223 
3224   /*
3225     The following code prevents use of join buffering when there is an
3226     outer join operation and first match semi-join strategy is used, because:
3227 
3228     Outer join needs a "match flag" to track that a row should be
3229     NULL-complemented, such flag being attached to first inner table's cache
3230     (tracks whether the cached row from outer table got a match, in which case
3231     no NULL-complemented row is needed).
3232 
3233     FirstMatch also needs a "match flag", such flag is attached to sj inner
3234     table's cache (tracks whether the cached row from outer table already got
3235     a first match in the sj-inner table, in which case we don't need to join
3236     this cached row again)
3237      - but a row in a cache has only one "match flag"
3238      - so if "sj inner table"=="first inner", there is a problem.
3239   */
3240   if (tab_sj_strategy == SJ_OPT_FIRST_MATCH &&
3241       tab->is_inner_table_of_outer_join())
3242     goto no_join_cache;
3243 
3244   switch (tab->type()) {
3245   case JT_ALL:
3246   case JT_INDEX_SCAN:
3247   case JT_RANGE:
3248   case JT_INDEX_MERGE:
3249     if (!bnl_on)
3250     {
3251       assert(tab->use_join_cache() == JOIN_CACHE::ALG_NONE);
3252       goto no_join_cache;
3253     }
3254 
3255     tab->set_use_join_cache(JOIN_CACHE::ALG_BNL);
3256     return false;
3257   case JT_SYSTEM:
3258   case JT_CONST:
3259   case JT_REF:
3260   case JT_EQ_REF:
3261     if (!bka_on)
3262     {
3263       assert(tab->use_join_cache() == JOIN_CACHE::ALG_NONE);
3264       goto no_join_cache;
3265     }
3266 
3267     /*
3268       Disable BKA for materializable derived tables/views as they aren't
3269       instantiated yet.
3270     */
3271     if (tab->table_ref->uses_materialization())
3272       goto no_join_cache;
3273 
3274     /*
3275       Can't use BKA for subquery if dealing with a subquery that can
3276       turn a ref access into a "full scan on NULL key" table scan.
3277 
3278       @see Item_in_optimizer::val_int()
3279       @see subselect_single_select_engine::exec()
3280       @see TABLE_REF::cond_guards
3281       @see push_index_cond()
3282 
3283       @todo: This choice to not use BKA should be done before making
3284       cost estimates, e.g. in set_join_buffer_properties(). That
3285       happens before cond guards are set up, so instead of doing the
3286       check below, BKA should be disabled if
3287        - We are in an IN subquery, and
3288        - The IN predicate is not a top_level_item, and
3289        - The left_expr of the IN predicate may contain NULL values
3290          (left_expr->maybe_null)
3291     */
3292     if (tab->has_guarded_conds())
3293       goto no_join_cache;
3294 
3295     if (tab->table()->covering_keys.is_set(tab->ref().key))
3296       join_cache_flags|= HA_MRR_INDEX_ONLY;
3297     rows= tab->table()->file->multi_range_read_info(tab->ref().key, 10, 20,
3298                                                   &bufsz,
3299                                                   &join_cache_flags, &cost);
3300     /*
3301       Cannot use BKA/BKA_UNIQUE if
3302       1. MRR scan cannot be performed, or
3303       2. MRR default implementation is used
3304       Cannot use BKA if
3305       3. HA_MRR_NO_ASSOCIATION flag is set
3306     */
3307     if ((rows == HA_POS_ERROR) ||                               // 1
3308         (join_cache_flags & HA_MRR_USE_DEFAULT_IMPL) ||    // 2
3309         ((join_cache_flags & HA_MRR_NO_ASSOCIATION) &&     // 3
3310          !use_bka_unique))
3311       goto no_join_cache;
3312 
3313     if (use_bka_unique)
3314       tab->set_use_join_cache(JOIN_CACHE::ALG_BKA_UNIQUE);
3315     else
3316       tab->set_use_join_cache(JOIN_CACHE::ALG_BKA);
3317 
3318     tab->join_cache_flags= join_cache_flags;
3319     return false;
3320   default : ;
3321   }
3322 
3323 no_join_cache:
3324   revise_cache_usage(tab);
3325   tab->set_use_join_cache(JOIN_CACHE::ALG_NONE);
3326   return false;
3327 }
3328 
3329 
3330 /*****************************************************************************
3331   Make some simple condition optimization:
3332   If there is a test 'field = const' change all refs to 'field' to 'const'
3333   Remove all dummy tests 'item = item', 'const op const'.
3334   Remove all 'item is NULL', when item can never be null!
3335   item->marker should be 0 for all items on entry
3336   Return in cond_value FALSE if condition is impossible (1 = 2)
3337 *****************************************************************************/
3338 
3339 class COND_CMP :public ilink<COND_CMP> {
3340 public:
operator new(size_t size)3341   static void *operator new(size_t size)
3342   {
3343     return sql_alloc(size);
3344   }
operator delete(void * ptr MY_ATTRIBUTE ((unused)),size_t size MY_ATTRIBUTE ((unused)))3345   static void operator delete(void *ptr MY_ATTRIBUTE((unused)),
3346                               size_t size MY_ATTRIBUTE((unused)))
3347   { TRASH(ptr, size); }
3348 
3349   Item *and_level;
3350   Item_func *cmp_func;
COND_CMP(Item * a,Item_func * b)3351   COND_CMP(Item *a,Item_func *b) :and_level(a),cmp_func(b) {}
3352 };
3353 
3354 
3355 /**
3356   Find the multiple equality predicate containing a field.
3357 
3358   The function retrieves the multiple equalities accessed through
3359   the cond_equal structure from current level and up looking for
3360   an equality containing a field. It stops retrieval as soon as the equality
3361   is found and set up inherited_fl to TRUE if it's found on upper levels.
3362 
3363   @param cond_equal          multiple equalities to search in
3364   @param item_field          field to look for
3365   @param[out] inherited_fl   set up to TRUE if multiple equality is found
3366                              on upper levels (not on current level of
3367                              cond_equal)
3368 
3369   @return
3370     - Item_equal for the found multiple equality predicate if a success;
3371     - NULL otherwise.
3372 */
3373 
find_item_equal(COND_EQUAL * cond_equal,Item_field * item_field,bool * inherited_fl)3374 Item_equal *find_item_equal(COND_EQUAL *cond_equal, Item_field *item_field,
3375                             bool *inherited_fl)
3376 {
3377   Item_equal *item= 0;
3378   bool in_upper_level= FALSE;
3379   while (cond_equal)
3380   {
3381     List_iterator_fast<Item_equal> li(cond_equal->current_level);
3382     while ((item= li++))
3383     {
3384       if (item->contains(item_field->field))
3385         goto finish;
3386     }
3387     in_upper_level= TRUE;
3388     cond_equal= cond_equal->upper_levels;
3389   }
3390   in_upper_level= FALSE;
3391 finish:
3392   *inherited_fl= in_upper_level;
3393   return item;
3394 }
3395 
3396 
3397 /**
3398   Get the best field substitution for a given field.
3399 
3400   If the field is member of a multiple equality, look up that equality
3401   and return the most appropriate field. Usually this is the equivalenced
3402   field belonging to the outer-most table in the join order, but
3403   @see Item_field::get_subst_item() for details.
3404   Otherwise, return the same field.
3405 
3406   @param item_field The field that we are seeking a substitution for.
3407   @param cond_equal multiple equalities to search in
3408 
3409   @return The substituted field.
3410 */
3411 
get_best_field(Item_field * item_field,COND_EQUAL * cond_equal)3412 Item_field *get_best_field(Item_field *item_field, COND_EQUAL *cond_equal)
3413 {
3414   bool dummy;
3415   Item_equal *item_eq= find_item_equal(cond_equal, item_field, &dummy);
3416   if (!item_eq)
3417     return item_field;
3418 
3419   return item_eq->get_subst_item(item_field);
3420 }
3421 
3422 
3423 /**
3424   Check whether an equality can be used to build multiple equalities.
3425 
3426     This function first checks whether the equality (left_item=right_item)
3427     is a simple equality i.e. one that equates a field with another field
3428     or a constant (field=field_item or field=const_item).
3429     If this is the case the function looks for a multiple equality
3430     in the lists referenced directly or indirectly by cond_equal inferring
3431     the given simple equality. If it doesn't find any, it builds a multiple
3432     equality that covers the predicate, i.e. the predicate can be inferred
3433     from this multiple equality.
3434     The built multiple equality could be obtained in such a way:
3435     create a binary  multiple equality equivalent to the predicate, then
3436     merge it, if possible, with one of old multiple equalities.
3437     This guarantees that the set of multiple equalities covering equality
3438     predicates will be minimal.
3439 
3440   EXAMPLE:
3441     For the where condition
3442     @code
3443       WHERE a=b AND b=c AND
3444             (b=2 OR f=e)
3445     @endcode
3446     the check_equality will be called for the following equality
3447     predicates a=b, b=c, b=2 and f=e.
3448     - For a=b it will be called with *cond_equal=(0,[]) and will transform
3449       *cond_equal into (0,[Item_equal(a,b)]).
3450     - For b=c it will be called with *cond_equal=(0,[Item_equal(a,b)])
3451       and will transform *cond_equal into CE=(0,[Item_equal(a,b,c)]).
3452     - For b=2 it will be called with *cond_equal=(ptr(CE),[])
3453       and will transform *cond_equal into (ptr(CE),[Item_equal(2,a,b,c)]).
3454     - For f=e it will be called with *cond_equal=(ptr(CE), [])
3455       and will transform *cond_equal into (ptr(CE),[Item_equal(f,e)]).
3456 
3457   @note
3458     Now only fields that have the same type definitions (verified by
3459     the Field::eq_def method) are placed to the same multiple equalities.
3460     Because of this some equality predicates are not eliminated and
3461     can be used in the constant propagation procedure.
3462     We could weaken the equality test as soon as at least one of the
3463     equal fields is to be equal to a constant. It would require a
3464     more complicated implementation: we would have to store, in
3465     general case, its own constant for each fields from the multiple
3466     equality. But at the same time it would allow us to get rid
3467     of constant propagation completely: it would be done by the call
3468     to build_equal_items_for_cond.
3469 
3470     The implementation does not follow exactly the above rules to
3471     build a new multiple equality for the equality predicate.
3472     If it processes the equality of the form field1=field2, it
3473     looks for multiple equalities me1 containing field1 and me2 containing
3474     field2. If only one of them is found the function expands it with
3475     the lacking field. If multiple equalities for both fields are
3476     found they are merged. If both searches fail a new multiple equality
3477     containing just field1 and field2 is added to the existing
3478     multiple equalities.
3479     If the function processes the predicate of the form field1=const,
3480     it looks for a multiple equality containing field1. If found, the
3481     function checks the constant of the multiple equality. If the value
3482     is unknown, it is setup to const. Otherwise the value is compared with
3483     const and the evaluation of the equality predicate is performed.
3484     When expanding/merging equality predicates from the upper levels
3485     the function first copies them for the current level. It looks
3486     acceptable, as this happens rarely. The implementation without
3487     copying would be much more complicated.
3488 
3489   @param thd         Thread handler
3490   @param left_item   left term of the equality to be checked
3491   @param right_item  right term of the equality to be checked
3492   @param item        equality item if the equality originates from a condition
3493                      predicate, 0 if the equality is the result of row
3494                      elimination
3495   @param cond_equal  multiple equalities that must hold together with the
3496                      equality
3497   @param[out] simple_equality
3498                      true  if the predicate is a simple equality predicate
3499                            to be used for building multiple equalities
3500                      false otherwise
3501 
3502   @returns false if success, true if error
3503 */
3504 
check_simple_equality(THD * thd,Item * left_item,Item * right_item,Item * item,COND_EQUAL * cond_equal,bool * simple_equality)3505 static bool check_simple_equality(THD *thd,
3506                                   Item *left_item, Item *right_item,
3507                                   Item *item, COND_EQUAL *cond_equal,
3508                                   bool *simple_equality)
3509 {
3510   *simple_equality= false;
3511 
3512   if (left_item->type() == Item::REF_ITEM &&
3513       down_cast<Item_ref *>(left_item)->ref_type() == Item_ref::VIEW_REF)
3514   {
3515     if (down_cast<Item_ref *>(left_item)->depended_from)
3516       return false;
3517     left_item= left_item->real_item();
3518   }
3519   if (right_item->type() == Item::REF_ITEM &&
3520       down_cast<Item_ref *>(right_item)->ref_type() == Item_ref::VIEW_REF)
3521   {
3522     if (down_cast<Item_ref *>(right_item)->depended_from)
3523       return false;
3524     right_item= right_item->real_item();
3525   }
3526   Item_field *left_item_field, *right_item_field;
3527 
3528   if (left_item->type() == Item::FIELD_ITEM &&
3529       right_item->type() == Item::FIELD_ITEM &&
3530       (left_item_field= down_cast<Item_field *>(left_item)) &&
3531       (right_item_field= down_cast<Item_field *>(right_item)) &&
3532       !left_item_field->depended_from &&
3533       !right_item_field->depended_from)
3534   {
3535     /* The predicate the form field1=field2 is processed */
3536 
3537     Field *const left_field= left_item_field->field;
3538     Field *const right_field= right_item_field->field;
3539 
3540     if (!left_field->eq_def(right_field))
3541       return false;
3542 
3543     /* Search for multiple equalities containing field1 and/or field2 */
3544     bool left_copyfl, right_copyfl;
3545     Item_equal *left_item_equal=
3546                find_item_equal(cond_equal, left_item_field, &left_copyfl);
3547     Item_equal *right_item_equal=
3548                find_item_equal(cond_equal, right_item_field, &right_copyfl);
3549 
3550     /* As (NULL=NULL) != TRUE we can't just remove the predicate f=f */
3551     if (left_field->eq(right_field)) /* f = f */
3552     {
3553       *simple_equality= !(left_field->maybe_null() && !left_item_equal);
3554       return false;
3555     }
3556 
3557     if (left_item_equal && left_item_equal == right_item_equal)
3558     {
3559       /*
3560         The equality predicate is inference of one of the existing
3561         multiple equalities, i.e the condition is already covered
3562         by upper level equalities
3563       */
3564        *simple_equality= true;
3565        return false;
3566     }
3567 
3568     /* Copy the found multiple equalities at the current level if needed */
3569     if (left_copyfl)
3570     {
3571       /* left_item_equal of an upper level contains left_item */
3572       left_item_equal= new Item_equal(left_item_equal);
3573       if (left_item_equal == NULL)
3574         return true;
3575       cond_equal->current_level.push_back(left_item_equal);
3576     }
3577     if (right_copyfl)
3578     {
3579       /* right_item_equal of an upper level contains right_item */
3580       right_item_equal= new Item_equal(right_item_equal);
3581       if (right_item_equal == NULL)
3582         return true;
3583       cond_equal->current_level.push_back(right_item_equal);
3584     }
3585 
3586     if (left_item_equal)
3587     {
3588       /* left item was found in the current or one of the upper levels */
3589       if (! right_item_equal)
3590         left_item_equal->add(down_cast<Item_field *>(right_item));
3591       else
3592       {
3593         /* Merge two multiple equalities forming a new one */
3594         if (left_item_equal->merge(thd, right_item_equal))
3595           return true;
3596         /* Remove the merged multiple equality from the list */
3597         List_iterator<Item_equal> li(cond_equal->current_level);
3598         while ((li++) != right_item_equal) ;
3599         li.remove();
3600       }
3601     }
3602     else
3603     {
3604       /* left item was not found neither the current nor in upper levels  */
3605       if (right_item_equal)
3606       {
3607         right_item_equal->add(down_cast<Item_field *>(left_item));
3608       }
3609       else
3610       {
3611         /* None of the fields was found in multiple equalities */
3612         Item_equal *item_equal=
3613           new Item_equal(down_cast<Item_field *>(left_item),
3614                          down_cast<Item_field *>(right_item));
3615         if (item_equal == NULL)
3616           return true;
3617         cond_equal->current_level.push_back(item_equal);
3618       }
3619     }
3620     *simple_equality= true;
3621     return false;
3622   }
3623 
3624   {
3625     /* The predicate of the form field=const/const=field is processed */
3626     Item *const_item= 0;
3627     Item_field *field_item= 0;
3628     if (left_item->type() == Item::FIELD_ITEM &&
3629         (field_item= down_cast<Item_field *>(left_item)) &&
3630         field_item->depended_from == NULL &&
3631         right_item->const_item())
3632     {
3633       const_item= right_item;
3634     }
3635     else if (right_item->type() == Item::FIELD_ITEM &&
3636              (field_item= down_cast<Item_field *>(right_item)) &&
3637              field_item->depended_from == NULL &&
3638              left_item->const_item())
3639     {
3640       const_item= left_item;
3641     }
3642 
3643     if (const_item &&
3644         field_item->result_type() == const_item->result_type())
3645     {
3646       if (field_item->result_type() == STRING_RESULT)
3647       {
3648         const CHARSET_INFO *cs= field_item->field->charset();
3649         if (!item)
3650         {
3651           Item_func_eq *const eq_item= new Item_func_eq(left_item, right_item);
3652           if (eq_item == NULL || eq_item->set_cmp_func())
3653             return true;
3654           eq_item->quick_fix_field();
3655           item= eq_item;
3656         }
3657         if ((cs != down_cast<Item_func *>(item)->compare_collation()) ||
3658             !cs->coll->propagate(cs, 0, 0))
3659           return false;
3660       }
3661 
3662       bool copyfl;
3663       Item_equal *item_equal= find_item_equal(cond_equal, field_item, &copyfl);
3664       if (copyfl)
3665       {
3666         item_equal= new Item_equal(item_equal);
3667         if (item_equal == NULL)
3668           return true;
3669         cond_equal->current_level.push_back(item_equal);
3670       }
3671       if (item_equal)
3672       {
3673         /*
3674           The flag cond_false will be set to 1 after this, if item_equal
3675           already contains a constant and its value is  not equal to
3676           the value of const_item.
3677         */
3678         if (item_equal->add(thd, const_item, field_item))
3679           return true;
3680       }
3681       else
3682       {
3683         item_equal= new Item_equal(const_item, field_item);
3684         if (item_equal == NULL)
3685           return true;
3686         cond_equal->current_level.push_back(item_equal);
3687       }
3688       *simple_equality= true;
3689       return false;
3690     }
3691   }
3692   return false;
3693 }
3694 
3695 
3696 /**
3697   Convert row equalities into a conjunction of regular equalities.
3698 
3699     The function converts a row equality of the form (E1,...,En)=(E'1,...,E'n)
3700     into a list of equalities E1=E'1,...,En=E'n. For each of these equalities
3701     Ei=E'i the function checks whether it is a simple equality or a row
3702     equality. If it is a simple equality it is used to expand multiple
3703     equalities of cond_equal. If it is a row equality it converted to a
3704     sequence of equalities between row elements. If Ei=E'i is neither a
3705     simple equality nor a row equality the item for this predicate is added
3706     to eq_list.
3707 
3708   @param thd        thread handle
3709   @param left_row   left term of the row equality to be processed
3710   @param right_row  right term of the row equality to be processed
3711   @param cond_equal multiple equalities that must hold together with the
3712                     predicate
3713   @param eq_list    results of conversions of row equalities that are not
3714                     simple enough to form multiple equalities
3715   @param[out] simple_equality
3716                     true if the row equality is composed of only
3717                     simple equalities.
3718 
3719   @returns false if conversion succeeded, true if any error.
3720 */
3721 
check_row_equality(THD * thd,Item * left_row,Item_row * right_row,COND_EQUAL * cond_equal,List<Item> * eq_list,bool * simple_equality)3722 static bool check_row_equality(THD *thd, Item *left_row, Item_row *right_row,
3723                                COND_EQUAL *cond_equal, List<Item>* eq_list,
3724                                bool *simple_equality)
3725 {
3726   *simple_equality= false;
3727   uint n= left_row->cols();
3728   for (uint i= 0 ; i < n; i++)
3729   {
3730     bool is_converted;
3731     Item *left_item= left_row->element_index(i);
3732     Item *right_item= right_row->element_index(i);
3733     if (left_item->type() == Item::ROW_ITEM &&
3734         right_item->type() == Item::ROW_ITEM)
3735     {
3736       if (check_row_equality(thd,
3737                              down_cast<Item_row *>(left_item),
3738                              down_cast<Item_row *>(right_item),
3739                              cond_equal, eq_list, &is_converted))
3740         return true;
3741       if (!is_converted)
3742         thd->lex->current_select()->cond_count++;
3743     }
3744     else
3745     {
3746       if (check_simple_equality(thd, left_item, right_item, 0, cond_equal,
3747                                 &is_converted))
3748         return true;
3749       thd->lex->current_select()->cond_count++;
3750     }
3751 
3752     if (!is_converted)
3753     {
3754       Item_func_eq *const eq_item= new Item_func_eq(left_item, right_item);
3755       if (eq_item == NULL)
3756         return true;
3757       if (eq_item->set_cmp_func())
3758       {
3759         // Failed to create cmp func -> not only simple equalitities
3760         return true;
3761       }
3762       eq_item->quick_fix_field();
3763       eq_list->push_back(eq_item);
3764     }
3765   }
3766   *simple_equality= true;
3767   return false;
3768 }
3769 
3770 
3771 /**
3772   Eliminate row equalities and form multiple equalities predicates.
3773 
3774     This function checks whether the item is a simple equality
3775     i.e. the one that equates a field with another field or a constant
3776     (field=field_item or field=constant_item), or, a row equality.
3777     For a simple equality the function looks for a multiple equality
3778     in the lists referenced directly or indirectly by cond_equal inferring
3779     the given simple equality. If it doesn't find any, it builds/expands
3780     multiple equality that covers the predicate.
3781     Row equalities are eliminated substituted for conjunctive regular
3782     equalities which are treated in the same way as original equality
3783     predicates.
3784 
3785   @param thd        thread handle
3786   @param item       predicate to process
3787   @param cond_equal multiple equalities that must hold together with the
3788                     predicate
3789   @param eq_list    results of conversions of row equalities that are not
3790                     simple enough to form multiple equalities
3791   @param[out] equality
3792                     true if re-writing rules have been applied
3793                     false otherwise, i.e.
3794                       if the predicate is not an equality, or
3795                       if the equality is neither a simple nor a row equality
3796 
3797   @returns false if success, true if error
3798 
3799   @note If the equality was created by IN->EXISTS, it may be removed later by
3800   subquery materialization. So we don't mix this possibly temporary equality
3801   with others; if we let it go into a multiple-equality (Item_equal), then we
3802   could not remove it later. There is however an exception: if the outer
3803   expression is a constant, it is safe to leave the equality even in
3804   materialization; all it can do is preventing NULL/FALSE distinction but if
3805   such distinction mattered the equality would be in a triggered condition so
3806   we would not come to this function. And injecting constants is good because
3807   it makes the materialized table smaller.
3808 */
3809 
check_equality(THD * thd,Item * item,COND_EQUAL * cond_equal,List<Item> * eq_list,bool * equality)3810 static bool check_equality(THD *thd, Item *item, COND_EQUAL *cond_equal,
3811                            List<Item> *eq_list, bool *equality)
3812 {
3813   *equality= false;
3814   Item_func *item_func;
3815   if (item->type() == Item::FUNC_ITEM &&
3816       (item_func= down_cast<Item_func *>(item))->functype() ==
3817       Item_func::EQ_FUNC)
3818   {
3819     Item *left_item= item_func->arguments()[0];
3820     Item *right_item= item_func->arguments()[1];
3821 
3822     if (item->created_by_in2exists() && !left_item->const_item())
3823       return false;                             // See note above
3824 
3825     if (left_item->type() == Item::ROW_ITEM &&
3826         right_item->type() == Item::ROW_ITEM)
3827     {
3828       thd->lex->current_select()->cond_count--;
3829       return check_row_equality(thd,
3830                                 down_cast<Item_row *>(left_item),
3831                                 down_cast<Item_row *>(right_item),
3832                                 cond_equal, eq_list, equality);
3833     }
3834     else
3835       return check_simple_equality(thd, left_item, right_item, item, cond_equal,
3836                                    equality);
3837   }
3838 
3839   return false;
3840 }
3841 
3842 
3843 /**
3844   Replace all equality predicates in a condition by multiple equality items.
3845 
3846     At each 'and' level the function detects items for equality predicates
3847     and replaces them by a set of multiple equality items of class Item_equal,
3848     taking into account inherited equalities from upper levels.
3849     If an equality predicate is used not in a conjunction it's just
3850     replaced by a multiple equality predicate.
3851     For each 'and' level the function set a pointer to the inherited
3852     multiple equalities in the cond_equal field of the associated
3853     object of the type Item_cond_and.
3854     The function also traverses the cond tree and for each field reference
3855     sets a pointer to the multiple equality item containing the field, if there
3856     is any. If this multiple equality equates fields to a constant the
3857     function replaces the field reference by the constant in the cases
3858     when the field is not of a string type or when the field reference is
3859     just an argument of a comparison predicate.
3860     The function also determines the maximum number of members in
3861     equality lists of each Item_cond_and object assigning it to
3862     thd->lex->current_select()->max_equal_elems.
3863 
3864   @note
3865     Multiple equality predicate =(f1,..fn) is equivalent to the conjuction of
3866     f1=f2, .., fn-1=fn. It substitutes any inference from these
3867     equality predicates that is equivalent to the conjunction.
3868     Thus, =(a1,a2,a3) can substitute for ((a1=a3) AND (a2=a3) AND (a2=a1)) as
3869     it is equivalent to ((a1=a2) AND (a2=a3)).
3870     The function always makes a substitution of all equality predicates occured
3871     in a conjunction for a minimal set of multiple equality predicates.
3872     This set can be considered as a canonical representation of the
3873     sub-conjunction of the equality predicates.
3874     E.g. (t1.a=t2.b AND t2.b>5 AND t1.a=t3.c) is replaced by
3875     (=(t1.a,t2.b,t3.c) AND t2.b>5), not by
3876     (=(t1.a,t2.b) AND =(t1.a,t3.c) AND t2.b>5);
3877     while (t1.a=t2.b AND t2.b>5 AND t3.c=t4.d) is replaced by
3878     (=(t1.a,t2.b) AND =(t3.c=t4.d) AND t2.b>5),
3879     but if additionally =(t4.d,t2.b) is inherited, it
3880     will be replaced by (=(t1.a,t2.b,t3.c,t4.d) AND t2.b>5)
3881 
3882     The function performs the substitution in a recursive descent of
3883     the condition tree, passing to the next AND level a chain of multiple
3884     equality predicates which have been built at the upper levels.
3885     The Item_equal items built at the level are attached to other
3886     non-equality conjuncts as a sublist. The pointer to the inherited
3887     multiple equalities is saved in the and condition object (Item_cond_and).
3888     This chain allows us for any field reference occurence to easily find a
3889     multiple equality that must be held for this occurence.
3890     For each AND level we do the following:
3891     - scan it for all equality predicate (=) items
3892     - join them into disjoint Item_equal() groups
3893     - process the included OR conditions recursively to do the same for
3894       lower AND levels.
3895 
3896     We need to do things in this order as lower AND levels need to know about
3897     all possible Item_equal objects in upper levels.
3898 
3899   @param thd          thread handle
3900   @param cond         condition(expression) where to make replacement
3901   @param[out] retcond returned condition
3902   @param inherited    path to all inherited multiple equality items
3903   @param do_inherit   whether or not to inherit equalities from other parts
3904                       of the condition
3905 
3906   @returns false if success, true if error
3907 */
3908 
build_equal_items_for_cond(THD * thd,Item * cond,Item ** retcond,COND_EQUAL * inherited,bool do_inherit)3909 static bool build_equal_items_for_cond(THD *thd, Item *cond, Item **retcond,
3910                                        COND_EQUAL *inherited, bool do_inherit)
3911 {
3912   Item_equal *item_equal;
3913   COND_EQUAL cond_equal;
3914   cond_equal.upper_levels= inherited;
3915 
3916   if (check_stack_overrun(thd, STACK_MIN_SIZE, NULL))
3917     return true;                          // Fatal error flag is set!
3918 
3919   const enum Item::Type cond_type= cond->type();
3920   if (cond_type == Item::COND_ITEM)
3921   {
3922     List<Item> eq_list;
3923     Item_cond *const item_cond= down_cast<Item_cond *>(cond);
3924     const bool and_level= item_cond->functype() == Item_func::COND_AND_FUNC;
3925     List<Item> *args= item_cond->argument_list();
3926 
3927     List_iterator<Item> li(*args);
3928     Item *item;
3929 
3930     if (and_level)
3931     {
3932       /*
3933          Retrieve all conjuncts of this level detecting the equality
3934          that are subject to substitution by multiple equality items and
3935          removing each such predicate from the conjunction after having
3936          found/created a multiple equality whose inference the predicate is.
3937      */
3938       while ((item= li++))
3939       {
3940         /*
3941           PS/SP note: we can safely remove a node from AND-OR
3942           structure here because it's restored before each
3943           re-execution of any prepared statement/stored procedure.
3944         */
3945         bool equality;
3946         if (check_equality(thd, item, &cond_equal, &eq_list, &equality))
3947           return true;
3948         if (equality)
3949           li.remove();
3950       }
3951 
3952       /*
3953         Check if we eliminated all the predicates of the level, e.g.
3954         (a=a AND b=b AND a=a).
3955       */
3956       if (!args->elements &&
3957           !cond_equal.current_level.elements &&
3958           !eq_list.elements)
3959       {
3960         *retcond= new Item_int((longlong) 1, 1);
3961         return *retcond == NULL;
3962       }
3963 
3964       List_iterator_fast<Item_equal> it(cond_equal.current_level);
3965       while ((item_equal= it++))
3966       {
3967         item_equal->fix_length_and_dec();
3968         item_equal->update_used_tables();
3969         set_if_bigger(thd->lex->current_select()->max_equal_elems,
3970                       item_equal->members());
3971       }
3972 
3973       Item_cond_and *const item_cond_and= down_cast<Item_cond_and *>(cond);
3974       item_cond_and->cond_equal= cond_equal;
3975       inherited= &item_cond_and->cond_equal;
3976     }
3977     /*
3978        Make replacement of equality predicates for lower levels
3979        of the condition expression.
3980     */
3981     li.rewind();
3982     while ((item= li++))
3983     {
3984       Item *new_item;
3985       if (build_equal_items_for_cond(thd, item, &new_item, inherited,
3986                                      do_inherit))
3987         return true;
3988       if (new_item != item)
3989       {
3990         /* This replacement happens only for standalone equalities */
3991         /*
3992           This is ok with PS/SP as the replacement is done for
3993           arguments of an AND/OR item, which are restored for each
3994           execution of PS/SP.
3995         */
3996         li.replace(new_item);
3997       }
3998     }
3999     if (and_level)
4000     {
4001       args->concat(&eq_list);
4002       args->concat((List<Item> *)&cond_equal.current_level);
4003     }
4004   }
4005   else if (cond->type() == Item::FUNC_ITEM)
4006   {
4007     List<Item> eq_list;
4008     /*
4009       If an equality predicate forms the whole and level,
4010       we call it standalone equality and it's processed here.
4011       E.g. in the following where condition
4012       WHERE a=5 AND (b=5 or a=c)
4013       (b=5) and (a=c) are standalone equalities.
4014       In general we can't leave alone standalone eqalities:
4015       for WHERE a=b AND c=d AND (b=c OR d=5)
4016       b=c is replaced by =(a,b,c,d).
4017      */
4018     bool equality;
4019     if (check_equality(thd, cond, &cond_equal, &eq_list, &equality))
4020       return true;
4021     if (equality)
4022     {
4023       int n= cond_equal.current_level.elements + eq_list.elements;
4024       if (n == 0)
4025       {
4026         *retcond= new Item_int((longlong) 1,1);
4027         return *retcond == NULL;
4028       }
4029       else if (n == 1)
4030       {
4031         if ((item_equal= cond_equal.current_level.pop()))
4032         {
4033           item_equal->fix_length_and_dec();
4034           item_equal->update_used_tables();
4035           set_if_bigger(thd->lex->current_select()->max_equal_elems,
4036                         item_equal->members());
4037           *retcond= item_equal;
4038           return false;
4039 	}
4040 
4041         *retcond= eq_list.pop();
4042         return false;
4043       }
4044       else
4045       {
4046         /*
4047           Here a new AND level must be created. It can happen only
4048           when a row equality is processed as a standalone predicate.
4049 	*/
4050         Item_cond_and *and_cond= new Item_cond_and(eq_list);
4051         if (and_cond == NULL)
4052           return true;
4053 
4054         and_cond->quick_fix_field();
4055         List<Item> *args= and_cond->argument_list();
4056         List_iterator_fast<Item_equal> it(cond_equal.current_level);
4057         while ((item_equal= it++))
4058         {
4059           item_equal->fix_length_and_dec();
4060           item_equal->update_used_tables();
4061           set_if_bigger(thd->lex->current_select()->max_equal_elems,
4062                         item_equal->members());
4063         }
4064         and_cond->cond_equal= cond_equal;
4065         args->concat((List<Item> *)&cond_equal.current_level);
4066 
4067         *retcond= and_cond;
4068         return false;
4069       }
4070     }
4071 
4072     if (do_inherit)
4073     {
4074       /*
4075         For each field reference in cond, not from equal item predicates,
4076         set a pointer to the multiple equality it belongs to (if there is any)
4077         as soon the field is not of a string type or the field reference is
4078         an argument of a comparison predicate.
4079       */
4080       uchar *is_subst_valid= (uchar *) 1;
4081       cond= cond->compile(&Item::subst_argument_checker,
4082                           &is_subst_valid,
4083                           &Item::equal_fields_propagator,
4084                           (uchar *) inherited);
4085       if (cond == NULL)
4086         return true;
4087     }
4088     cond->update_used_tables();
4089   }
4090   *retcond= cond;
4091   return false;
4092 }
4093 
4094 
4095 /**
4096   Build multiple equalities for a WHERE condition and all join conditions that
4097   inherit these multiple equalities.
4098 
4099     The function first applies the build_equal_items_for_cond function
4100     to build all multiple equalities for condition cond utilizing equalities
4101     referred through the parameter inherited. The extended set of
4102     equalities is returned in the structure referred by the cond_equal_ref
4103     parameter. After this the function calls itself recursively for
4104     all join conditions whose direct references can be found in join_list
4105     and who inherit directly the multiple equalities just having built.
4106 
4107   @note
4108     The join condition used in an outer join operation inherits all equalities
4109     from the join condition of the embedding join, if there is any, or
4110     otherwise - from the where condition.
4111     This fact is not obvious, but presumably can be proved.
4112     Consider the following query:
4113     @code
4114       SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t1.a=t3.a AND t2.a=t4.a
4115         WHERE t1.a=t2.a;
4116     @endcode
4117     If the join condition in the query inherits =(t1.a,t2.a), then we
4118     can build the multiple equality =(t1.a,t2.a,t3.a,t4.a) that infers
4119     the equality t3.a=t4.a. Although the join condition
4120     t1.a=t3.a AND t2.a=t4.a AND t3.a=t4.a is not equivalent to the one
4121     in the query the latter can be replaced by the former: the new query
4122     will return the same result set as the original one.
4123 
4124     Interesting that multiple equality =(t1.a,t2.a,t3.a,t4.a) allows us
4125     to use t1.a=t3.a AND t3.a=t4.a under the join condition:
4126     @code
4127       SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t1.a=t3.a AND t3.a=t4.a
4128         WHERE t1.a=t2.a
4129     @endcode
4130     This query equivalent to:
4131     @code
4132       SELECT * FROM (t1 LEFT JOIN (t3,t4) ON t1.a=t3.a AND t3.a=t4.a),t2
4133         WHERE t1.a=t2.a
4134     @endcode
4135     Similarly the original query can be rewritten to the query:
4136     @code
4137       SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t2.a=t4.a AND t3.a=t4.a
4138         WHERE t1.a=t2.a
4139     @endcode
4140     that is equivalent to:
4141     @code
4142       SELECT * FROM (t2 LEFT JOIN (t3,t4)ON t2.a=t4.a AND t3.a=t4.a), t1
4143         WHERE t1.a=t2.a
4144     @endcode
4145     Thus, applying equalities from the where condition we basically
4146     can get more freedom in performing join operations.
4147     Although we don't use this property now, it probably makes sense to use
4148     it in the future.
4149 
4150   @param thd		     Thread handler
4151   @param cond                condition to build the multiple equalities for
4152   @param[out] retcond        Returned condition
4153   @param inherited           path to all inherited multiple equality items
4154   @param do_inherit          whether or not to inherit equalities from other
4155                              parts of the condition
4156   @param join_list           list of join tables that the condition refers to
4157   @param[out] cond_equal_ref pointer to the structure to place built
4158                              equalities in
4159 
4160   @returns false if success, true if error
4161 */
4162 
build_equal_items(THD * thd,Item * cond,Item ** retcond,COND_EQUAL * inherited,bool do_inherit,List<TABLE_LIST> * join_list,COND_EQUAL ** cond_equal_ref)4163 bool build_equal_items(THD *thd, Item *cond, Item **retcond,
4164                        COND_EQUAL *inherited, bool do_inherit,
4165                        List<TABLE_LIST> *join_list,
4166                        COND_EQUAL **cond_equal_ref)
4167 {
4168   COND_EQUAL *cond_equal= 0;
4169 
4170   if (cond)
4171   {
4172     if (build_equal_items_for_cond(thd, cond, &cond, inherited, do_inherit))
4173       return true;
4174     cond->update_used_tables();
4175     const enum Item::Type cond_type= cond->type();
4176     if (cond_type == Item::COND_ITEM &&
4177         down_cast<Item_cond *>(cond)->functype() == Item_func::COND_AND_FUNC)
4178       cond_equal= &down_cast<Item_cond_and *>(cond)->cond_equal;
4179     else if (cond_type == Item::FUNC_ITEM &&
4180          down_cast<Item_func *>(cond)->functype() == Item_func::MULT_EQUAL_FUNC)
4181     {
4182       cond_equal= new COND_EQUAL;
4183       if (cond_equal == NULL)
4184         return true;
4185       cond_equal->current_level.push_back(down_cast<Item_equal *>(cond));
4186     }
4187   }
4188   if (cond_equal)
4189   {
4190     cond_equal->upper_levels= inherited;
4191     inherited= cond_equal;
4192   }
4193   *cond_equal_ref= cond_equal;
4194 
4195   if (join_list)
4196   {
4197     TABLE_LIST *table;
4198     List_iterator<TABLE_LIST> li(*join_list);
4199 
4200     while ((table= li++))
4201     {
4202       if (table->join_cond_optim())
4203       {
4204         List<TABLE_LIST> *nested_join_list= table->nested_join ?
4205           &table->nested_join->join_list : NULL;
4206         Item *join_cond;
4207         if (build_equal_items(thd, table->join_cond_optim(), &join_cond,
4208                               inherited, do_inherit,
4209                               nested_join_list, &table->cond_equal))
4210           return true;
4211         table->set_join_cond_optim(join_cond);
4212       }
4213     }
4214   }
4215 
4216   *retcond= cond;
4217   return false;
4218 }
4219 
4220 
4221 /**
4222   Compare field items by table order in the execution plan.
4223 
4224     field1 considered as better than field2 if the table containing
4225     field1 is accessed earlier than the table containing field2.
4226     The function finds out what of two fields is better according
4227     this criteria.
4228 
4229   @param field1          first field item to compare
4230   @param field2          second field item to compare
4231   @param table_join_idx  index to tables determining table order
4232 
4233   @retval
4234    -1  if field1 is better than field2
4235   @retval
4236     1  if field2 is better than field1
4237   @retval
4238     0  otherwise
4239 */
4240 
compare_fields_by_table_order(Item_field * field1,Item_field * field2,void * table_join_idx)4241 static int compare_fields_by_table_order(Item_field *field1,
4242                                   Item_field *field2,
4243                                   void *table_join_idx)
4244 {
4245   int cmp= 0;
4246   bool outer_ref= 0;
4247   if (field1->used_tables() & OUTER_REF_TABLE_BIT)
4248   {
4249     outer_ref= 1;
4250     cmp= -1;
4251   }
4252   if (field2->used_tables() & OUTER_REF_TABLE_BIT)
4253   {
4254     outer_ref= 1;
4255     cmp++;
4256   }
4257   if (outer_ref)
4258     return cmp;
4259   JOIN_TAB **idx= (JOIN_TAB **) table_join_idx;
4260 
4261   /*
4262     idx is NULL if this function was not called from JOIN::optimize()
4263     but from e.g. mysql_delete() or mysql_update(). In these cases
4264     there is only one table and both fields belong to it. Example
4265     condition where this is the case: t1.fld1=t1.fld2
4266   */
4267   if (!idx)
4268     return 0;
4269 
4270   // Locate JOIN_TABs thanks to table_join_idx, then compare their index.
4271   cmp= idx[field1->table_ref->tableno()]->idx() -
4272        idx[field2->table_ref->tableno()]->idx();
4273   return cmp < 0 ? -1 : (cmp ? 1 : 0);
4274 }
4275 
4276 
4277 /**
4278   Generate minimal set of simple equalities equivalent to a multiple equality.
4279 
4280     The function retrieves the fields of the multiple equality item
4281     item_equal and  for each field f:
4282     - if item_equal contains const it generates the equality f=const_item;
4283     - otherwise, if f is not the first field, generates the equality
4284       f=item_equal->get_first().
4285     All generated equality are added to the cond conjunction.
4286 
4287   @param cond            condition to add the generated equality to
4288   @param upper_levels    structure to access multiple equality of upper levels
4289   @param item_equal      multiple equality to generate simple equality from
4290 
4291   @note
4292     Before generating an equality function checks that it has not
4293     been generated for multiple equalities of the upper levels.
4294     E.g. for the following where condition
4295     WHERE a=5 AND ((a=b AND b=c) OR  c>4)
4296     the upper level AND condition will contain =(5,a),
4297     while the lower level AND condition will contain =(5,a,b,c).
4298     When splitting =(5,a,b,c) into a separate equality predicates
4299     we should omit 5=a, as we have it already in the upper level.
4300     The following where condition gives us a more complicated case:
4301     WHERE t1.a=t2.b AND t3.c=t4.d AND (t2.b=t3.c OR t4.e>5 ...) AND ...
4302     Given the tables are accessed in the order t1->t2->t3->t4 for
4303     the selected query execution plan the lower level multiple
4304     equality =(t1.a,t2.b,t3.c,t4.d) formally  should be converted to
4305     t1.a=t2.b AND t1.a=t3.c AND t1.a=t4.d. But t1.a=t2.a will be
4306     generated for the upper level. Also t3.c=t4.d will be generated there.
4307     So only t1.a=t3.c should be left in the lower level.
4308     If cond is equal to 0, then not more then one equality is generated
4309     and a pointer to it is returned as the result of the function.
4310 
4311   @return
4312     - The condition with generated simple equalities or
4313     a pointer to the simple generated equality, if success.
4314     - 0, otherwise.
4315 */
4316 
eliminate_item_equal(Item * cond,COND_EQUAL * upper_levels,Item_equal * item_equal)4317 static Item *eliminate_item_equal(Item *cond, COND_EQUAL *upper_levels,
4318                                   Item_equal *item_equal)
4319 {
4320   List<Item> eq_list;
4321   Item_func_eq *eq_item= NULL;
4322   if (((Item *) item_equal)->const_item() && !item_equal->val_int())
4323     return new Item_int((longlong) 0,1);
4324   Item *const item_const= item_equal->get_const();
4325   Item_equal_iterator it(*item_equal);
4326   if (!item_const)
4327   {
4328     /*
4329       If there is a const item, match all field items with the const item,
4330       otherwise match the second and subsequent field items with the first one:
4331     */
4332     it++;
4333   }
4334   Item_field *item_field; // Field to generate equality for.
4335   while ((item_field= it++))
4336   {
4337     /*
4338       Generate an equality of the form:
4339       item_field = some previous field in item_equal's list.
4340 
4341       First see if we really need to generate it:
4342     */
4343     Item_equal *const upper= item_field->find_item_equal(upper_levels);
4344     if (upper) // item_field is in this upper equality
4345     {
4346       if (item_const && upper->get_const())
4347         continue; // Const at both levels, no need to generate at current level
4348       /*
4349         If the upper-level multiple equality contains this item, there is no
4350         need to generate the equality, unless item_field belongs to a
4351         semi-join nest that is used for Materialization, and refers to tables
4352         that are outside of the materialized semi-join nest,
4353         As noted in Item_equal::get_subst_item(), subquery materialization
4354         does not have this problem.
4355       */
4356       JOIN_TAB *const tab= item_field->field->table->reginfo.join_tab;
4357 
4358       if (!(tab && sj_is_materialize_strategy(tab->get_sj_strategy())))
4359       {
4360         Item_field *item_match;
4361         Item_equal_iterator li(*item_equal);
4362         while ((item_match= li++) != item_field)
4363         {
4364           if (item_match->find_item_equal(upper_levels) == upper)
4365             break; // (item_match, item_field) is also in upper level equality
4366         }
4367         if (item_match != item_field)
4368           continue;
4369       }
4370     } // ... if (upper).
4371 
4372     /*
4373       item_field should be compared with the head of the multiple equality
4374       list.
4375       item_field may refer to a table that is within a semijoin materialization
4376       nest. In that case, the order of the join_tab entries may look like:
4377 
4378         ot1 ot2 <subquery> ot5 SJM(it3 it4)
4379 
4380       If we have a multiple equality
4381 
4382         (ot1.c1, ot2.c2, <subquery>.c it3.c3, it4.c4, ot5.c5),
4383 
4384       we should generate the following equalities:
4385         1. ot1.c1 = ot2.c2
4386         2. ot1.c1 = <subquery>.c
4387         3. it3.c3 = it4.c4
4388         4. ot1.c1 = ot5.c5
4389 
4390       Equalities 1) and 4) are regular equalities between two outer tables.
4391       Equality 2) is an equality that matches the outer query with a
4392       materialized temporary table. It is either performed as a lookup
4393       into the materialized table (SJM-lookup), or as a condition on the
4394       outer table (SJM-scan).
4395       Equality 3) is evaluated during semijoin materialization.
4396 
4397       If there is a const item, match against this one.
4398       Otherwise, match against the first field item in the multiple equality,
4399       unless the item is within a materialized semijoin nest, in case it will
4400       be matched against the first item within the SJM nest.
4401       @see JOIN::set_prefix_tables()
4402       @see Item_equal::get_subst_item()
4403     */
4404 
4405     Item *const head=
4406       item_const ? item_const : item_equal->get_subst_item(item_field);
4407     if (head == item_field)
4408       continue;
4409 
4410     // we have a pair, can generate 'item_field=head'
4411     if (eq_item)
4412       eq_list.push_back(eq_item);
4413 
4414     eq_item= new Item_func_eq(item_field, head);
4415     if (!eq_item || eq_item->set_cmp_func())
4416       return NULL;
4417     eq_item->quick_fix_field();
4418   } // ... while ((item_field= it++))
4419 
4420   if (!cond && !eq_list.head())
4421   {
4422     if (!eq_item)
4423       return new Item_int((longlong) 1,1);
4424     return eq_item;
4425   }
4426 
4427   if (eq_item)
4428     eq_list.push_back(eq_item);
4429   if (!cond)
4430     cond= new Item_cond_and(eq_list);
4431   else
4432   {
4433     assert(cond->type() == Item::COND_ITEM);
4434     if (eq_list.elements)
4435       ((Item_cond *) cond)->add_at_head(&eq_list);
4436   }
4437 
4438   cond->quick_fix_field();
4439   cond->update_used_tables();
4440 
4441   return cond;
4442 }
4443 
4444 
4445 /**
4446   Substitute every field reference in a condition by the best equal field
4447   and eliminate all multiple equality predicates.
4448 
4449     The function retrieves the cond condition and for each encountered
4450     multiple equality predicate it sorts the field references in it
4451     according to the order of tables specified by the table_join_idx
4452     parameter. Then it eliminates the multiple equality predicate it
4453     replacing it by the conjunction of simple equality predicates
4454     equating every field from the multiple equality to the first
4455     field in it, or to the constant, if there is any.
4456     After this the function retrieves all other conjuncted
4457     predicates substitute every field reference by the field reference
4458     to the first equal field or equal constant if there are any.
4459 
4460   @param cond            condition to process
4461   @param cond_equal      multiple equalities to take into consideration
4462   @param table_join_idx  index to tables determining field preference
4463 
4464   @note
4465     At the first glance full sort of fields in multiple equality
4466     seems to be an overkill. Yet it's not the case due to possible
4467     new fields in multiple equality item of lower levels. We want
4468     the order in them to comply with the order of upper levels.
4469 
4470   @return
4471     The transformed condition, or NULL in case of error
4472 */
4473 
substitute_for_best_equal_field(Item * cond,COND_EQUAL * cond_equal,void * table_join_idx)4474 Item* substitute_for_best_equal_field(Item *cond,
4475                                       COND_EQUAL *cond_equal,
4476                                       void *table_join_idx)
4477 {
4478   Item_equal *item_equal;
4479 
4480   if (cond->type() == Item::COND_ITEM)
4481   {
4482     List<Item> *cond_list= ((Item_cond*) cond)->argument_list();
4483 
4484     bool and_level= ((Item_cond*) cond)->functype() ==
4485                       Item_func::COND_AND_FUNC;
4486     if (and_level)
4487     {
4488       cond_equal= &((Item_cond_and *) cond)->cond_equal;
4489       cond_list->disjoin((List<Item> *) &cond_equal->current_level);
4490 
4491       List_iterator_fast<Item_equal> it(cond_equal->current_level);
4492       while ((item_equal= it++))
4493       {
4494         item_equal->sort(&compare_fields_by_table_order, table_join_idx);
4495       }
4496     }
4497 
4498     List_iterator<Item> li(*cond_list);
4499     Item *item;
4500     while ((item= li++))
4501     {
4502       Item *new_item= substitute_for_best_equal_field(item, cond_equal,
4503                                                       table_join_idx);
4504       if (new_item == NULL)
4505         return NULL;
4506       /*
4507         This works OK with PS/SP re-execution as changes are made to
4508         the arguments of AND/OR items only
4509       */
4510       if (new_item != item)
4511         li.replace(new_item);
4512     }
4513 
4514     if (and_level)
4515     {
4516       List_iterator_fast<Item_equal> it(cond_equal->current_level);
4517       while ((item_equal= it++))
4518       {
4519         cond= eliminate_item_equal(cond, cond_equal->upper_levels, item_equal);
4520         if (cond == NULL)
4521           return NULL;
4522         // This occurs when eliminate_item_equal() founds that cond is
4523         // always false and substitutes it with Item_int 0.
4524         // Due to this, value of item_equal will be 0, so just return it.
4525         if (cond->type() != Item::COND_ITEM)
4526           break;
4527       }
4528     }
4529     if (cond->type() == Item::COND_ITEM &&
4530         !((Item_cond*)cond)->argument_list()->elements)
4531       cond= new Item_int((int32)cond->val_bool());
4532 
4533   }
4534   else if (cond->type() == Item::FUNC_ITEM &&
4535            ((Item_cond*) cond)->functype() == Item_func::MULT_EQUAL_FUNC)
4536   {
4537     item_equal= (Item_equal *) cond;
4538     item_equal->sort(&compare_fields_by_table_order, table_join_idx);
4539     if (cond_equal && cond_equal->current_level.head() == item_equal)
4540       cond_equal= cond_equal->upper_levels;
4541     return eliminate_item_equal(0, cond_equal, item_equal);
4542   }
4543   else
4544     cond->transform(&Item::replace_equal_field, 0);
4545   return cond;
4546 }
4547 
4548 
4549 /**
4550   change field = field to field = const for each found field = const in the
4551   and_level
4552 
4553   @param thd      Thread handler
4554   @param save_list
4555   @param and_father
4556   @param cond       Condition where fields are replaced with constant values
4557   @param field      The field that will be substituted
4558   @param value      The substitution value
4559 
4560   @returns false if success, true if error
4561 */
4562 
4563 static bool
change_cond_ref_to_const(THD * thd,I_List<COND_CMP> * save_list,Item * and_father,Item * cond,Item * field,Item * value)4564 change_cond_ref_to_const(THD *thd, I_List<COND_CMP> *save_list,
4565                          Item *and_father, Item *cond,
4566                          Item *field, Item *value)
4567 {
4568   if (cond->type() == Item::COND_ITEM)
4569   {
4570     Item_cond *const item_cond= down_cast<Item_cond *>(cond);
4571     bool and_level= item_cond->functype() == Item_func::COND_AND_FUNC;
4572     List_iterator<Item> li(*item_cond->argument_list());
4573     Item *item;
4574     while ((item=li++))
4575     {
4576       if (change_cond_ref_to_const(thd, save_list,
4577                                    and_level ? cond : item,
4578                                    item, field, value))
4579         return true;
4580     }
4581     return false;
4582   }
4583   if (cond->eq_cmp_result() == Item::COND_OK)
4584     return false;                // Not a boolean function
4585 
4586   Item_bool_func2 *func= down_cast<Item_bool_func2 *>(cond);
4587   Item **args= func->arguments();
4588   Item *left_item=  args[0];
4589   Item *right_item= args[1];
4590   Item_func::Functype functype= func->functype();
4591 
4592   if (right_item->eq(field,0) && left_item != value &&
4593       right_item->cmp_context == field->cmp_context &&
4594       (left_item->result_type() != STRING_RESULT ||
4595        value->result_type() != STRING_RESULT ||
4596        left_item->collation.collation == value->collation.collation))
4597   {
4598     Item *const clone= value->clone_item();
4599     if (thd->is_error())
4600       return true;
4601 
4602     if (clone == NULL)
4603       return false;
4604 
4605     clone->collation.set(right_item->collation);
4606     thd->change_item_tree(args + 1, clone);
4607     func->update_used_tables();
4608     if ((functype == Item_func::EQ_FUNC ||
4609          functype == Item_func::EQUAL_FUNC) &&
4610         and_father != cond && !left_item->const_item())
4611     {
4612       cond->marker=1;
4613       COND_CMP *const cond_cmp= new COND_CMP(and_father,func);
4614       if (cond_cmp == NULL)
4615         return true;
4616 
4617       save_list->push_back(cond_cmp);
4618 
4619     }
4620     if (func->set_cmp_func())
4621       return true;
4622   }
4623   else if (left_item->eq(field,0) && right_item != value &&
4624            left_item->cmp_context == field->cmp_context &&
4625            (right_item->result_type() != STRING_RESULT ||
4626             value->result_type() != STRING_RESULT ||
4627             right_item->collation.collation == value->collation.collation))
4628   {
4629     Item *const clone= value->clone_item();
4630     if (thd->is_error())
4631       return true;
4632 
4633     if (clone == NULL)
4634       return false;
4635 
4636     clone->collation.set(left_item->collation);
4637     thd->change_item_tree(args, clone);
4638     value= clone;
4639     func->update_used_tables();
4640     if ((functype == Item_func::EQ_FUNC ||
4641          functype == Item_func::EQUAL_FUNC) &&
4642         and_father != cond && !right_item->const_item())
4643     {
4644       args[0]= args[1];                       // For easy check
4645       thd->change_item_tree(args + 1, value);
4646       cond->marker=1;
4647       COND_CMP *const cond_cmp= new COND_CMP(and_father,func);
4648       if (cond_cmp == NULL)
4649         return true;
4650 
4651       save_list->push_back(cond_cmp);
4652     }
4653     if (func->set_cmp_func())
4654       return true;
4655   }
4656   return false;
4657 }
4658 
4659 /**
4660   Propagate constant values in a condition
4661 
4662   @param thd        Thread handler
4663   @param save_list
4664   @param and_father
4665   @param cond       Condition for which constant values are propagated
4666 
4667   @returns false if success, true if error
4668 */
4669 static bool
propagate_cond_constants(THD * thd,I_List<COND_CMP> * save_list,Item * and_father,Item * cond)4670 propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list,
4671                          Item *and_father, Item *cond)
4672 {
4673   if (cond->type() == Item::COND_ITEM)
4674   {
4675     Item_cond *const item_cond= down_cast<Item_cond *>(cond);
4676     bool and_level= item_cond->functype() == Item_func::COND_AND_FUNC;
4677     List_iterator_fast<Item> li(*item_cond->argument_list());
4678     Item *item;
4679     I_List<COND_CMP> save;
4680     while ((item=li++))
4681     {
4682       if (propagate_cond_constants(thd, &save, and_level ? cond : item, item))
4683         return true;
4684     }
4685     if (and_level)
4686     {						// Handle other found items
4687       I_List_iterator<COND_CMP> cond_itr(save);
4688       COND_CMP *cond_cmp;
4689       while ((cond_cmp= cond_itr++))
4690       {
4691         Item **args= cond_cmp->cmp_func->arguments();
4692         if (!args[0]->const_item() &&
4693             change_cond_ref_to_const(thd, &save, cond_cmp->and_level,
4694                                      cond_cmp->and_level, args[0], args[1]))
4695           return true;
4696       }
4697     }
4698   }
4699   else if (and_father != cond && !cond->marker)		// In a AND group
4700   {
4701     Item_func *func;
4702     if (cond->type() == Item::FUNC_ITEM &&
4703         (func= down_cast<Item_func *>(cond)) &&
4704 	(func->functype() == Item_func::EQ_FUNC ||
4705 	 func->functype() == Item_func::EQUAL_FUNC))
4706     {
4707       Item **args= func->arguments();
4708       bool left_const= args[0]->const_item();
4709       bool right_const= args[1]->const_item();
4710       if (!(left_const && right_const) &&
4711           args[0]->result_type() == args[1]->result_type())
4712       {
4713 	if (right_const)
4714 	{
4715           if (resolve_const_item(thd, &args[1], args[0]))
4716             return true;
4717 	  func->update_used_tables();
4718           if (change_cond_ref_to_const(thd, save_list, and_father, and_father,
4719                                        args[0], args[1]))
4720             return true;
4721 	}
4722 	else if (left_const)
4723 	{
4724           if (resolve_const_item(thd, &args[0], args[1]))
4725             return true;
4726 	  func->update_used_tables();
4727           if (change_cond_ref_to_const(thd, save_list, and_father, and_father,
4728                                        args[1], args[0]))
4729             return true;
4730 	}
4731       }
4732     }
4733   }
4734 
4735   return false;
4736 }
4737 
4738 
4739 /**
4740   Assign each nested join structure a bit in nested_join_map.
4741 
4742   @param join_list     List of tables
4743   @param first_unused  Number of first unused bit in nested_join_map before the
4744                        call
4745 
4746   @note
4747     This function is called after simplify_joins(), when there are no
4748     redundant nested joins.
4749     We cannot have more nested joins in a query block than there are tables,
4750     so as long as the number of bits in nested_join_map is not less than the
4751     maximum number of tables in a query block, nested_join_map can never
4752     overflow.
4753 
4754   @return
4755     First unused bit in nested_join_map after the call.
4756 */
4757 
build_bitmap_for_nested_joins(List<TABLE_LIST> * join_list,uint first_unused)4758 uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list,
4759                                    uint first_unused)
4760 {
4761   List_iterator<TABLE_LIST> li(*join_list);
4762   TABLE_LIST *table;
4763   DBUG_ENTER("build_bitmap_for_nested_joins");
4764   while ((table= li++))
4765   {
4766     NESTED_JOIN *nested_join;
4767     if ((nested_join= table->nested_join))
4768     {
4769       // We should have either a join condition or a semi-join condition
4770       assert((table->join_cond() == NULL) == (table->sj_cond() != NULL));
4771 
4772       nested_join->nj_map= 0;
4773       nested_join->nj_total= 0;
4774       /*
4775         We only record nested join information for outer join nests.
4776         Tables belonging in semi-join nests are recorded in the
4777         embedding outer join nest, if one exists.
4778       */
4779       if (table->join_cond())
4780       {
4781         assert(first_unused < sizeof(nested_join_map)*8);
4782         nested_join->nj_map= (nested_join_map) 1 << first_unused++;
4783         nested_join->nj_total= nested_join->join_list.elements;
4784       }
4785       else if (table->sj_cond())
4786       {
4787         NESTED_JOIN *const outer_nest=
4788           table->embedding ? table->embedding->nested_join : NULL;
4789         /*
4790           The semi-join nest has already been counted into the table count
4791           for the outer join nest as one table, so subtract 1 from the
4792           table count.
4793         */
4794         if (outer_nest)
4795           outer_nest->nj_total+= (nested_join->join_list.elements - 1);
4796       }
4797       else
4798         assert(false);
4799 
4800       first_unused= build_bitmap_for_nested_joins(&nested_join->join_list,
4801                                                   first_unused);
4802     }
4803   }
4804   DBUG_RETURN(first_unused);
4805 }
4806 
4807 
4808 /** Update the dependency map for the tables. */
4809 
update_depend_map()4810 void JOIN::update_depend_map()
4811 {
4812   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
4813   for (uint tableno = 0; tableno < tables; tableno++)
4814   {
4815     JOIN_TAB *const tab= best_ref[tableno];
4816     TABLE_REF *const ref= &tab->ref();
4817     table_map depend_map= 0;
4818     Item **item= ref->items;
4819     for (uint i = 0; i < ref->key_parts; i++, item++)
4820       depend_map|= (*item)->used_tables();
4821     depend_map&= ~PSEUDO_TABLE_BITS;
4822     ref->depend_map= depend_map;
4823     for (JOIN_TAB **tab2= map2table; depend_map; tab2++, depend_map >>= 1)
4824     {
4825       if (depend_map & 1)
4826 	ref->depend_map|= (*tab2)->ref().depend_map;
4827     }
4828   }
4829 }
4830 
4831 
4832 /** Update the dependency map for the sort order. */
4833 
update_depend_map(ORDER * order)4834 void JOIN::update_depend_map(ORDER *order)
4835 {
4836   for (; order ; order=order->next)
4837   {
4838     table_map depend_map;
4839     order->item[0]->update_used_tables();
4840     order->depend_map= depend_map=
4841       order->item[0]->used_tables() & ~PARAM_TABLE_BIT;
4842     order->used= 0;
4843     // Not item_sum(), RAND() and no reference to table outside of sub select
4844     if (!(order->depend_map & (OUTER_REF_TABLE_BIT | RAND_TABLE_BIT))
4845         && !order->item[0]->with_sum_func)
4846     {
4847       for (JOIN_TAB **tab= map2table; depend_map; tab++, depend_map >>= 1)
4848       {
4849 	if (depend_map & 1)
4850 	  order->depend_map|=(*tab)->ref().depend_map;
4851       }
4852     }
4853   }
4854 }
4855 
4856 
4857 /**
4858   Update equalities and keyuse references after semi-join materialization
4859   strategy is chosen.
4860 
4861   @details
4862     For each multiple equality that contains a field that is selected
4863     from a subquery, and that subquery is executed using a semi-join
4864     materialization strategy, add the corresponding column in the materialized
4865     temporary table to the equality.
4866     For each injected semi-join equality that is not converted to
4867     multiple equality, replace the reference to the expression selected
4868     from the subquery with the corresponding column in the temporary table.
4869 
4870     This is needed to properly reflect the equalities that involve injected
4871     semi-join equalities when materialization strategy is chosen.
4872     @see eliminate_item_equal() for how these equalities are used to generate
4873     correct equality predicates.
4874 
4875     The MaterializeScan semi-join strategy requires some additional processing:
4876     All primary tables after the materialized temporary table must be inspected
4877     for keyuse objects that point to expressions from the subquery tables.
4878     These references must be replaced with references to corresponding columns
4879     in the materialized temporary table instead. Those primary tables using
4880     ref access will thus be made to depend on the materialized temporary table
4881     instead of the subquery tables.
4882 
4883     Only the injected semi-join equalities need this treatment, other predicates
4884     will be handled correctly by the regular item substitution process.
4885 
4886   @return False if success, true if error
4887 */
4888 
update_equalities_for_sjm()4889 bool JOIN::update_equalities_for_sjm()
4890 {
4891   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
4892   List_iterator<Semijoin_mat_exec> it(sjm_exec_list);
4893   Semijoin_mat_exec *sjm_exec;
4894   while ((sjm_exec= it++))
4895   {
4896     TABLE_LIST *const sj_nest= sjm_exec->sj_nest;
4897 
4898     assert(!sj_nest->outer_join_nest());
4899     /*
4900       A materialized semi-join nest cannot actually be an inner part of an
4901       outer join yet, this is just a preparatory step,
4902       ie sj_nest->outer_join_nest() is always NULL here.
4903       @todo: Enable outer joining here later.
4904     */
4905     Item *cond= sj_nest->outer_join_nest() ?
4906       sj_nest->outer_join_nest()->join_cond_optim() : where_cond;
4907     if (!cond)
4908       continue;
4909 
4910     uchar *dummy= NULL;
4911     cond= cond->compile(&Item::equality_substitution_analyzer, &dummy,
4912                         &Item::equality_substitution_transformer,
4913                         (uchar *)sj_nest);
4914     if (cond == NULL)
4915       return true;
4916 
4917     cond->update_used_tables();
4918 
4919     // Loop over all primary tables that follow the materialized table
4920     for (uint j= sjm_exec->mat_table_index + 1; j < primary_tables; j++)
4921     {
4922       JOIN_TAB *const tab= best_ref[j];
4923       for (Key_use *keyuse= tab->position()->key;
4924            keyuse && keyuse->table_ref == tab->table_ref &&
4925            keyuse->key == tab->position()->key->key;
4926            keyuse++)
4927       {
4928         List_iterator<Item> it(sj_nest->nested_join->sj_inner_exprs);
4929         Item *old;
4930         uint fieldno= 0;
4931         while ((old= it++))
4932         {
4933           if (old->real_item()->eq(keyuse->val->real_item(), false))
4934           {
4935             /*
4936               Replace the expression selected from the subquery with the
4937               corresponding column of the materialized temporary table.
4938             */
4939             keyuse->val= sj_nest->nested_join->sjm.mat_fields[fieldno];
4940             keyuse->used_tables= keyuse->val->used_tables();
4941             break;
4942           }
4943           fieldno++;
4944         }
4945       }
4946     }
4947   }
4948 
4949   return false;
4950 }
4951 
4952 
4953 /**
4954   Assign set of available (prefix) tables to all tables in query block.
4955   Also set added tables, ie the tables added in each JOIN_TAB compared to the
4956   previous JOIN_TAB.
4957   This function must be called for every query block after the table order
4958   has been determined.
4959 */
4960 
set_prefix_tables()4961 void JOIN::set_prefix_tables()
4962 {
4963   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
4964   assert(!plan_is_const());
4965   /*
4966     The const tables are available together with the first non-const table in
4967     the join order.
4968   */
4969   table_map const initial_tables_map= const_table_map |
4970     (allow_outer_refs ? OUTER_REF_TABLE_BIT : 0);
4971 
4972   table_map current_tables_map= initial_tables_map;
4973   table_map prev_tables_map= (table_map) 0;
4974   table_map saved_tables_map= (table_map) 0;
4975 
4976   JOIN_TAB *last_non_sjm_tab= NULL; // Track the last non-sjm table
4977 
4978   for (uint i= const_tables; i < tables; i++)
4979   {
4980     JOIN_TAB *const tab= best_ref[i];
4981     if (!tab->table())
4982       continue;
4983     /*
4984       Tables that are within SJ-Materialization nests cannot have their
4985       conditions referring to preceding non-const tables.
4986        - If we're looking at the first SJM table, reset current_tables_map
4987          to refer to only allowed tables
4988       @see Item_equal::get_subst_item()
4989       @see eliminate_item_equal()
4990     */
4991     if (sj_is_materialize_strategy(tab->get_sj_strategy()))
4992     {
4993       const table_map sjm_inner_tables= tab->emb_sj_nest->sj_inner_tables;
4994       if (!(sjm_inner_tables & current_tables_map))
4995       {
4996         saved_tables_map= current_tables_map;
4997         current_tables_map= initial_tables_map;
4998         prev_tables_map= (table_map) 0;
4999       }
5000 
5001       current_tables_map|= tab->table_ref->map();
5002       tab->set_prefix_tables(current_tables_map, prev_tables_map);
5003       prev_tables_map= current_tables_map;
5004 
5005       if (!(sjm_inner_tables & ~current_tables_map))
5006       {
5007         /*
5008           At the end of a semi-join materialization nest,
5009           add non-deterministic expressions to the last table of the nest:
5010         */
5011         tab->add_prefix_tables(RAND_TABLE_BIT);
5012 
5013         // Restore the previous map:
5014         current_tables_map= saved_tables_map;
5015         prev_tables_map= last_non_sjm_tab ?
5016                          last_non_sjm_tab->prefix_tables() : (table_map) 0;
5017       }
5018     }
5019     else
5020     {
5021       last_non_sjm_tab= tab;
5022       current_tables_map|= tab->table_ref->map();
5023       tab->set_prefix_tables(current_tables_map, prev_tables_map);
5024       prev_tables_map= current_tables_map;
5025     }
5026   }
5027   /*
5028     Non-deterministic expressions must be added to the last table's condition.
5029     It solves problem with queries like SELECT * FROM t1 WHERE rand() > 0.5
5030   */
5031   if (last_non_sjm_tab != NULL)
5032     last_non_sjm_tab->add_prefix_tables(RAND_TABLE_BIT);
5033 }
5034 
5035 
5036 /**
5037   Calculate best possible join order and initialize the join structure.
5038 
5039   @return true if success, false if error.
5040 
5041   The JOIN object is populated with statistics about the query,
5042   and a plan with table order and access method selection is made.
5043 
5044   The list of tables to be optimized is taken from select_lex->leaf_tables.
5045   JOIN::where_cond is also used in the optimization.
5046   As a side-effect, JOIN::keyuse_array is populated with key_use information.
5047 
5048   Here is an overview of the logic of this function:
5049 
5050   - Initialize JOIN data structures and setup basic dependencies between tables.
5051 
5052   - Update dependencies based on join information.
5053 
5054   - Make key descriptions (update_ref_and_keys()).
5055 
5056   - Pull out semi-join tables based on table dependencies.
5057 
5058   - Extract tables with zero or one rows as const tables.
5059 
5060   - Read contents of const tables, substitute columns from these tables with
5061     actual data. Also keep track of empty tables vs. one-row tables.
5062 
5063   - After const table extraction based on row count, more tables may
5064     have become functionally dependent. Extract these as const tables.
5065 
5066   - Add new sargable predicates based on retrieved const values.
5067 
5068   - Calculate number of rows to be retrieved from each table.
5069 
5070   - Calculate cost of potential semi-join materializations.
5071 
5072   - Calculate best possible join order based on available statistics.
5073 
5074   - Fill in remaining information for the generated join order.
5075 */
5076 
make_join_plan()5077 bool JOIN::make_join_plan()
5078 {
5079   DBUG_ENTER("JOIN::make_join_plan");
5080 
5081   SARGABLE_PARAM *sargables= NULL;
5082 
5083   Opt_trace_context * const trace= &thd->opt_trace;
5084 
5085   if (init_planner_arrays())           // Create and initialize the arrays
5086     DBUG_RETURN(true);
5087 
5088   // Outer join dependencies were initialized above, now complete the analysis.
5089   if (select_lex->outer_join)
5090     propagate_dependencies();
5091 
5092   if (unlikely(trace->is_started()))
5093     trace_table_dependencies(trace, join_tab, primary_tables);
5094 
5095   // Build the key access information, which is the basis for ref access.
5096   if (where_cond || select_lex->outer_join)
5097   {
5098     if (update_ref_and_keys(thd, &keyuse_array, join_tab, tables, where_cond,
5099                             cond_equal, ~select_lex->outer_join, select_lex,
5100                             &sargables))
5101       DBUG_RETURN(true);
5102   }
5103 
5104   /*
5105     Pull out semi-join tables based on dependencies. Dependencies are valid
5106     throughout the lifetime of a query, so this operation can be performed
5107     on the first optimization only.
5108   */
5109   if (!select_lex->sj_pullout_done && select_lex->sj_nests.elements &&
5110       pull_out_semijoin_tables(this))
5111     DBUG_RETURN(true);
5112 
5113   select_lex->sj_pullout_done= true;
5114   const uint sj_nests= select_lex->sj_nests.elements; // Changed by pull-out
5115 
5116   if (!(select_lex->active_options() & OPTION_NO_CONST_TABLES))
5117   {
5118     // Detect tables that are const (0 or 1 row) and read their contents.
5119     if (extract_const_tables())
5120       DBUG_RETURN(true);
5121 
5122     // Detect tables that are functionally dependent on const values.
5123     if (extract_func_dependent_tables())
5124       DBUG_RETURN(true);
5125   }
5126   // Possibly able to create more sargable predicates from const rows.
5127   if (const_tables && sargables)
5128     update_sargable_from_const(sargables);
5129 
5130   // Make a first estimate of the fanout for each table in the query block.
5131   if (estimate_rowcount())
5132     DBUG_RETURN(true);
5133 
5134   if (sj_nests)
5135   {
5136     set_semijoin_embedding();
5137     select_lex->update_semijoin_strategies(thd);
5138   }
5139 
5140   if (!plan_is_const())
5141     optimize_keyuse();
5142 
5143   allow_outer_refs= true;
5144 
5145   if (sj_nests && optimize_semijoin_nests_for_materialization(this))
5146     DBUG_RETURN(true);
5147 
5148   // Choose the table order based on analysis done so far.
5149   if (Optimize_table_order(thd, this, NULL).choose_table_order())
5150     DBUG_RETURN(true);
5151 
5152   DBUG_EXECUTE_IF("bug13820776_1", thd->killed= THD::KILL_QUERY;);
5153   if (thd->killed || thd->is_error())
5154     DBUG_RETURN(true);
5155 
5156   // If this is a subquery, decide between In-to-exists and materialization
5157   if (unit->item && decide_subquery_strategy())
5158     DBUG_RETURN(true);
5159 
5160   refine_best_rowcount();
5161 
5162   if (!(thd->variables.option_bits & OPTION_BIG_SELECTS) &&
5163       best_read > (double) thd->variables.max_join_size &&
5164       !thd->lex->is_explain())
5165   {						/* purecov: inspected */
5166     my_message(ER_TOO_BIG_SELECT, ER(ER_TOO_BIG_SELECT), MYF(0));
5167     error= -1;
5168     DBUG_RETURN(1);
5169   }
5170 
5171   positions= NULL;  // But keep best_positions for get_best_combination
5172 
5173   /*
5174     Store the cost of this query into a user variable
5175     Don't update m_current_query_cost for statements that are not "flat joins" :
5176     i.e. they have subqueries, unions or call stored procedures.
5177     TODO: calculate a correct cost for a query with subqueries and UNIONs.
5178   */
5179   if (thd->lex->is_single_level_stmt())
5180     thd->m_current_query_cost= best_read;
5181 
5182   // Generate an execution plan from the found optimal join order.
5183   if (get_best_combination())
5184     DBUG_RETURN(true);
5185 
5186   // Cleanup after update_ref_and_keys has added keys for derived tables.
5187   if (select_lex->materialized_derived_table_count)
5188     drop_unused_derived_keys();
5189 
5190   // No need for this struct after new JOIN_TAB array is set up.
5191   best_positions= NULL;
5192 
5193   // Some called function may still set error status unnoticed
5194   if (thd->is_error())
5195     DBUG_RETURN(true);
5196 
5197   // There is at least one empty const table
5198   if (const_table_map != found_const_table_map)
5199     zero_result_cause= "no matching row in const table";
5200 
5201   DBUG_RETURN(false);
5202 }
5203 
5204 
5205 /**
5206   Initialize scratch arrays for the join order optimization
5207 
5208   @returns false if success, true if error
5209 
5210   @note If something fails during initialization, JOIN::cleanup()
5211         will free anything that has been partially allocated and set up.
5212         Arrays are created in the execution mem_root, so they will be
5213         deleted automatically when the mem_root is re-initialized.
5214 */
5215 
init_planner_arrays()5216 bool JOIN::init_planner_arrays()
5217 {
5218   // Up to one extra slot per semi-join nest is needed (if materialized)
5219   const uint sj_nests= select_lex->sj_nests.elements;
5220   const uint table_count= select_lex->leaf_table_count;
5221 
5222   assert(primary_tables == 0 && tables == 0);
5223 
5224   if (!(join_tab= alloc_jtab_array(thd, table_count)))
5225     return true;
5226 
5227   /*
5228     We add 2 cells:
5229     - because planning stage uses 0-termination so needs +1
5230     - because after get_best_combination, we don't use 0-termination but
5231     need +2, to host at most 2 tmp sort/group/distinct tables.
5232   */
5233   if (!(best_ref= (JOIN_TAB **) thd->alloc(sizeof(JOIN_TAB *) *
5234                                            (table_count + sj_nests + 2))))
5235     return true;
5236 
5237   // sort/group tmp tables have no map
5238   if (!(map2table= (JOIN_TAB **) thd->alloc(sizeof(JOIN_TAB *) *
5239                                            (table_count + sj_nests))))
5240     return true;
5241 
5242   if (!(positions= new (thd->mem_root) POSITION[table_count]))
5243     return true;
5244 
5245   if (!(best_positions= new (thd->mem_root) POSITION[table_count+sj_nests]))
5246     return true;
5247 
5248   /*
5249     Initialize data structures for tables to be joined.
5250     Initialize dependencies between tables.
5251   */
5252   JOIN_TAB **best_ref_p= best_ref;
5253   TABLE_LIST *tl= select_lex->leaf_tables;
5254 
5255   for (JOIN_TAB *tab= join_tab;
5256        tl;
5257        tab++, tl= tl->next_leaf, best_ref_p++)
5258   {
5259     *best_ref_p= tab;
5260     TABLE *const table= tl->table;
5261     tab->table_ref= tl;
5262     tab->set_table(table);
5263     const int err= tl->fetch_number_of_rows();
5264 
5265     // Initialize the cost model for the table
5266     table->init_cost_model(cost_model());
5267 
5268     DBUG_EXECUTE_IF("bug11747970_raise_error",
5269                     {
5270                       if (!err)
5271                       {
5272                         my_error(ER_UNKNOWN_ERROR, MYF(0));
5273                         return true;
5274                       }
5275                     });
5276 
5277     if (err)
5278     {
5279       table->file->print_error(err, MYF(0));
5280       return true;
5281     }
5282     table->quick_keys.clear_all();
5283     table->possible_quick_keys.clear_all();
5284     table->reginfo.not_exists_optimize= false;
5285     memset(table->const_key_parts, 0, sizeof(key_part_map)*table->s->keys);
5286     all_table_map|= tl->map();
5287     tab->set_join(this);
5288 
5289     tab->dependent= tl->dep_tables;  // Initialize table dependencies
5290     if (tl->schema_table)
5291       table->file->stats.records= 2;
5292     table->quick_condition_rows= table->file->stats.records;
5293 
5294     tab->init_join_cond_ref(tl);
5295 
5296     if (tl->outer_join_nest())
5297     {
5298       // tab belongs to a nested join, maybe to several embedding joins
5299       tab->embedding_map= 0;
5300       for (TABLE_LIST *embedding= tl->embedding;
5301            embedding;
5302            embedding= embedding->embedding)
5303       {
5304         NESTED_JOIN *const nested_join= embedding->nested_join;
5305         tab->embedding_map|= nested_join->nj_map;
5306         tab->dependent|= embedding->dep_tables;
5307       }
5308     }
5309     else if (tab->join_cond())
5310     {
5311       // tab is the only inner table of an outer join
5312       tab->embedding_map= 0;
5313       for (TABLE_LIST *embedding= tl->embedding;
5314            embedding;
5315            embedding= embedding->embedding)
5316         tab->embedding_map|= embedding->nested_join->nj_map;
5317     }
5318     tables++;                     // Count number of initialized tables
5319   }
5320 
5321   primary_tables= tables;
5322   *best_ref_p= NULL;              // Last element of array must be NULL
5323 
5324   return false;
5325 }
5326 
5327 
5328 /**
5329   Propagate dependencies between tables due to outer join relations.
5330 
5331   @returns false if success, true if error
5332 
5333   Build transitive closure for relation 'to be dependent on'.
5334   This will speed up the plan search for many cases with outer joins,
5335   as well as allow us to catch illegal cross references.
5336   Warshall's algorithm is used to build the transitive closure.
5337   As we may restart the outer loop upto 'table_count' times, the
5338   complexity of the algorithm is O((number of tables)^3).
5339   However, most of the iterations will be shortcircuited when
5340   there are no dependencies to propagate.
5341 */
5342 
propagate_dependencies()5343 bool JOIN::propagate_dependencies()
5344 {
5345   for (uint i= 0; i < tables; i++)
5346   {
5347     if (!join_tab[i].dependent)
5348       continue;
5349 
5350     // Add my dependencies to other tables depending on me
5351     uint j;
5352     JOIN_TAB *tab;
5353     for (j= 0, tab= join_tab; j < tables; j++, tab++)
5354     {
5355       if (tab->dependent & join_tab[i].table_ref->map())
5356       {
5357         const table_map was_dependent= tab->dependent;
5358         tab->dependent|= join_tab[i].dependent;
5359         /*
5360           If we change dependencies for a table we already have
5361           processed: Redo dependency propagation from this table.
5362         */
5363         if (i > j && tab->dependent != was_dependent)
5364         {
5365           i= j-1;
5366           break;
5367         }
5368       }
5369     }
5370   }
5371 
5372   JOIN_TAB *const tab_end= join_tab + tables;
5373   for (JOIN_TAB *tab= join_tab; tab < tab_end; tab++)
5374   {
5375     /*
5376       Catch illegal cross references for outer joins.
5377       This could happen before WL#2486 was implemented in 5.0, but should no
5378       longer be possible.
5379       Thus, an assert has been added should this happen again.
5380       @todo Remove the error check below.
5381     */
5382     assert(!(tab->dependent & tab->table_ref->map()));
5383 
5384     if (tab->dependent & tab->table_ref->map())
5385     {
5386       tables= 0;               // Don't use join->table
5387       primary_tables= 0;
5388       my_message(ER_WRONG_OUTER_JOIN, ER(ER_WRONG_OUTER_JOIN), MYF(0));
5389       return true;
5390     }
5391 
5392     tab->key_dependent= tab->dependent;
5393   }
5394 
5395   return false;
5396 }
5397 
5398 
5399 /**
5400   Extract const tables based on row counts.
5401 
5402   @returns false if success, true if error
5403 
5404   This extraction must be done for each execution.
5405   Tables containing exactly zero or one rows are marked as const, but
5406   notice the additional constraints checked below.
5407   Tables that are extracted have their rows read before actual execution
5408   starts and are placed in the beginning of the join_tab array.
5409   Thus, they do not take part in join order optimization process,
5410   which can significantly reduce the optimization time.
5411   The data read from these tables can also be regarded as "constant"
5412   throughout query execution, hence the column values can be used for
5413   additional constant propagation and extraction of const tables based
5414   on eq-ref properties.
5415 
5416   The tables are given the type JT_SYSTEM.
5417 */
5418 
extract_const_tables()5419 bool JOIN::extract_const_tables()
5420 {
5421   enum enum_const_table_extraction
5422   {
5423      extract_no_table=    0,
5424      extract_empty_table= 1,
5425      extract_const_table= 2
5426   };
5427 
5428   JOIN_TAB *const tab_end= join_tab + tables;
5429   for (JOIN_TAB *tab= join_tab; tab < tab_end; tab++)
5430   {
5431     TABLE      *const table= tab->table();
5432     TABLE_LIST *const tl= tab->table_ref;
5433     enum enum_const_table_extraction extract_method= extract_const_table;
5434 
5435     const bool all_partitions_pruned_away= table->all_partitions_pruned_away;
5436 
5437     if (tl->outer_join_nest())
5438     {
5439       /*
5440         Table belongs to a nested join, no candidate for const table extraction.
5441       */
5442       extract_method= extract_no_table;
5443     }
5444     else if (tl->embedding && tl->embedding->sj_cond())
5445     {
5446       /*
5447         Table belongs to a semi-join.
5448         We do not currently pull out const tables from semi-join nests.
5449       */
5450       extract_method= extract_no_table;
5451     }
5452     else if (tab->join_cond())
5453     {
5454       // tab is the only inner table of an outer join, extract empty tables
5455       extract_method= extract_empty_table;
5456     }
5457     switch (extract_method)
5458     {
5459     case extract_no_table:
5460       break;
5461 
5462     case extract_empty_table:
5463       // Extract tables with zero rows, but only if statistics are exact
5464       if ((table->file->stats.records == 0 ||
5465            all_partitions_pruned_away) &&
5466           (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT))
5467         mark_const_table(tab, NULL);
5468       break;
5469 
5470     case extract_const_table:
5471       /*
5472         Extract tables with zero or one rows, but do not extract tables that
5473          1. are dependent upon other tables, or
5474          2. have no exact statistics, or
5475          3. are full-text searched
5476       */
5477       if ((table->s->system ||
5478            table->file->stats.records <= 1 ||
5479            all_partitions_pruned_away) &&
5480           !tab->dependent &&                                             // 1
5481           (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) && // 2
5482           !table->fulltext_searched)                                     // 3
5483         mark_const_table(tab, NULL);
5484       break;
5485     }
5486   }
5487 
5488   // Read const tables (tables matching no more than 1 rows)
5489   if (!const_tables)
5490     return false;
5491 
5492   for (POSITION *p_pos= positions, *p_end= p_pos + const_tables;
5493        p_pos < p_end;
5494        p_pos++)
5495   {
5496     JOIN_TAB *const tab= p_pos->table;
5497     const int status= join_read_const_table(tab, p_pos);
5498     if (status > 0)
5499       return true;
5500     else if (status == 0)
5501     {
5502       found_const_table_map|= tab->table_ref->map();
5503       tab->table_ref->optimized_away= true;
5504     }
5505   }
5506 
5507   return false;
5508 }
5509 
5510 /**
5511   Extract const tables based on functional dependencies.
5512 
5513   @returns false if success, true if error
5514 
5515   This extraction must be done for each execution.
5516 
5517   Mark as const the tables that
5518    - are functionally dependent on constant values, or
5519    - are inner tables of an outer join and contain exactly zero or one rows
5520 
5521   Tables that are extracted have their rows read before actual execution
5522   starts and are placed in the beginning of the join_tab array, just as
5523   described for JOIN::extract_const_tables().
5524 
5525   The tables are given the type JT_CONST.
5526 */
5527 
extract_func_dependent_tables()5528 bool JOIN::extract_func_dependent_tables()
5529 {
5530   // loop until no more const tables are found
5531   bool ref_changed;
5532   table_map found_ref;
5533   do
5534   {
5535   more_const_tables_found:
5536     ref_changed = false;
5537     found_ref= 0;
5538 
5539     // Loop over all tables that are not already determined to be const
5540     for (JOIN_TAB **pos= best_ref + const_tables; *pos; pos++)
5541     {
5542       JOIN_TAB *const tab= *pos;
5543       TABLE *const table= tab->table();
5544       TABLE_LIST *const tl= tab->table_ref;
5545       /*
5546         If equi-join condition by a key is null rejecting and after a
5547         substitution of a const table the key value happens to be null
5548         then we can state that there are no matches for this equi-join.
5549       */
5550       Key_use *keyuse= tab->keyuse();
5551       if (keyuse && tab->join_cond() && !tab->embedding_map)
5552       {
5553         /*
5554           When performing an outer join operation if there are no matching rows
5555           for the single row of the outer table all the inner tables are to be
5556           null complemented and thus considered as constant tables.
5557           Here we apply this consideration to the case of outer join operations
5558           with a single inner table only because the case with nested tables
5559           would require a more thorough analysis.
5560           TODO. Apply single row substitution to null complemented inner tables
5561           for nested outer join operations.
5562 	*/
5563         while (keyuse->table_ref == tl)
5564         {
5565           if (!(keyuse->val->used_tables() & ~const_table_map) &&
5566               keyuse->val->is_null() && keyuse->null_rejecting)
5567           {
5568             table->set_null_row();
5569             found_const_table_map|= tl->map();
5570             mark_const_table(tab, keyuse);
5571             goto more_const_tables_found;
5572            }
5573 	  keyuse++;
5574         }
5575       }
5576 
5577       if (tab->dependent)              // If dependent on some table
5578       {
5579         // All dependent tables must be const
5580         if (tab->dependent & ~const_table_map)
5581           continue;
5582         /*
5583           Mark a dependent table as constant if
5584            1. it has exactly zero or one rows (it is a system table), and
5585            2. it is not within a nested outer join, and
5586            3. it does not have an expensive outer join condition.
5587               This is because we have to determine whether an outer-joined table
5588               has a real row or a null-extended row in the optimizer phase.
5589               We have no possibility to evaluate its join condition at
5590               execution time, when it is marked as a system table.
5591         */
5592 	if (table->file->stats.records <= 1L &&                            // 1
5593             (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) && // 1
5594             !tl->outer_join_nest() &&                                      // 2
5595             !(tab->join_cond() && tab->join_cond()->is_expensive()))   // 3
5596 	{                              // system table
5597           mark_const_table(tab, NULL);
5598           const int status=
5599             join_read_const_table(tab, positions + const_tables - 1);
5600           if (status > 0)
5601             return true;
5602           else if (status == 0)
5603             found_const_table_map|= tl->map();
5604           continue;
5605         }
5606       }
5607 
5608       // Check if table can be read by key or table only uses const refs
5609 
5610       if ((keyuse= tab->keyuse()))
5611       {
5612         while (keyuse->table_ref == tl)
5613         {
5614           Key_use *const start_keyuse= keyuse;
5615           const uint key= keyuse->key;
5616           tab->keys().set_bit(key);               // QQ: remove this ?
5617 
5618           table_map refs= 0;
5619           key_map const_ref, eq_part;
5620           do
5621           {
5622             if (keyuse->val->type() != Item::NULL_ITEM && !keyuse->optimize)
5623             {
5624               if (!((~found_const_table_map) & keyuse->used_tables))
5625                 const_ref.set_bit(keyuse->keypart);
5626               else
5627                 refs|= keyuse->used_tables;
5628               eq_part.set_bit(keyuse->keypart);
5629             }
5630             keyuse++;
5631           } while (keyuse->table_ref == tl && keyuse->key == key);
5632 
5633           /*
5634             Extract const tables with proper key dependencies.
5635             Exclude tables that
5636              1. are full-text searched, or
5637              2. are part of nested outer join, or
5638              3. are part of semi-join, or
5639              4. have an expensive outer join condition.
5640              5. are blocked by handler for const table optimize.
5641           */
5642           if (eq_part.is_prefix(table->key_info[key].user_defined_key_parts) &&
5643               !table->fulltext_searched &&                           // 1
5644               !tl->outer_join_nest() &&                              // 2
5645               !(tl->embedding && tl->embedding->sj_cond()) &&        // 3
5646               !(tab->join_cond() && tab->join_cond()->is_expensive()) &&// 4
5647               !(table->file->ha_table_flags() & HA_BLOCK_CONST_TABLE))  // 5
5648           {
5649             if (table->key_info[key].flags & HA_NOSAME)
5650             {
5651               if (const_ref == eq_part)
5652               {                        // Found everything for ref.
5653                 ref_changed = true;
5654                 mark_const_table(tab, start_keyuse);
5655                 if (create_ref_for_key(this, tab, start_keyuse,
5656                                        found_const_table_map))
5657                   return true;
5658                 const int status=
5659                   join_read_const_table(tab, positions + const_tables - 1);
5660                 if (status > 0)
5661                   return true;
5662                 else if (status == 0)
5663                   found_const_table_map|= tl->map();
5664                 break;
5665               }
5666               else
5667                 found_ref|= refs;       // Table is const if all refs are const
5668             }
5669             else if (const_ref == eq_part)
5670               tab->const_keys.set_bit(key);
5671           }
5672 	}
5673       }
5674     }
5675   } while ((const_table_map & found_ref) && ref_changed);
5676 
5677   return false;
5678 }
5679 
5680 /**
5681   Update info on indexes that can be used for search lookups as
5682   reading const tables may has added new sargable predicates.
5683 */
5684 
update_sargable_from_const(SARGABLE_PARAM * sargables)5685 void JOIN::update_sargable_from_const(SARGABLE_PARAM *sargables)
5686 {
5687   for ( ; sargables->field; sargables++)
5688   {
5689     Field *const field= sargables->field;
5690     JOIN_TAB *const tab= field->table->reginfo.join_tab;
5691     key_map possible_keys= field->key_start;
5692     possible_keys.intersect(field->table->keys_in_use_for_query);
5693     bool is_const= true;
5694     for (uint j= 0; j < sargables->num_values; j++)
5695       is_const&= sargables->arg_value[j]->const_item();
5696     if (is_const)
5697     {
5698       tab->const_keys.merge(possible_keys);
5699       tab->keys().merge(possible_keys);
5700     }
5701   }
5702 }
5703 
5704 
5705 /**
5706   Estimate the number of matched rows for each joined table.
5707   Set up range scan for tables that have proper predicates.
5708 
5709   @returns false if success, true if error
5710 */
5711 
estimate_rowcount()5712 bool JOIN::estimate_rowcount()
5713 {
5714   Opt_trace_context *const trace= &thd->opt_trace;
5715   Opt_trace_object trace_wrapper(trace);
5716   Opt_trace_array trace_records(trace, "rows_estimation");
5717 
5718   JOIN_TAB *const tab_end= join_tab + tables;
5719   for (JOIN_TAB *tab= join_tab; tab < tab_end; tab++)
5720   {
5721     const Cost_model_table *const cost_model= tab->table()->cost_model();
5722     Opt_trace_object trace_table(trace);
5723     trace_table.add_utf8_table(tab->table_ref);
5724     if (tab->type() == JT_SYSTEM || tab->type() == JT_CONST)
5725     {
5726       trace_table.add("rows", 1).add("cost", 1)
5727         .add_alnum("table_type", (tab->type() == JT_SYSTEM) ? "system": "const")
5728         .add("empty", tab->table()->has_null_row());
5729 
5730       // Only one matching row and one block to read
5731       tab->set_records(tab->found_records= 1);
5732       tab->worst_seeks= cost_model->page_read_cost(1.0);
5733       tab->read_time= static_cast<ha_rows>(tab->worst_seeks);
5734       continue;
5735     }
5736     // Approximate number of found rows and cost to read them
5737     tab->set_records(tab->found_records= tab->table()->file->stats.records);
5738     const Cost_estimate table_scan_time= tab->table()->file->table_scan_cost();
5739     tab->read_time= static_cast<ha_rows>(table_scan_time.total_cost());
5740 
5741     /*
5742       Set a max value for the cost of seek operations we can expect
5743       when using key lookup. This can't be too high as otherwise we
5744       are likely to use table scan.
5745     */
5746     tab->worst_seeks=
5747       min(cost_model->page_read_cost((double) tab->found_records / 10),
5748           (double) tab->read_time * 3);
5749     const double min_worst_seek= cost_model->page_read_cost(2.0);
5750     if (tab->worst_seeks < min_worst_seek)      // Fix for small tables
5751       tab->worst_seeks= min_worst_seek;
5752 
5753     /*
5754       Add to tab->const_keys those indexes for which all group fields or
5755       all select distinct fields participate in one index.
5756     */
5757     add_group_and_distinct_keys(this, tab);
5758 
5759     /*
5760       Perform range analysis if there are keys it could use (1).
5761       Don't do range analysis if on the inner side of an outer join (2).
5762       Do range analysis if on the inner side of a semi-join (3).
5763     */
5764     TABLE_LIST *const tl= tab->table_ref;
5765     if (!tab->const_keys.is_clear_all() &&                        // (1)
5766         (!tl->embedding ||                                        // (2)
5767          (tl->embedding && tl->embedding->sj_cond())))            // (3)
5768     {
5769       /*
5770         This call fills tab->quick() with the best QUICK access method
5771         possible for this table, and only if it's better than table scan.
5772         It also fills tab->needed_reg.
5773       */
5774       ha_rows records= get_quick_record_count(thd, tab, row_limit);
5775 
5776       if (records == 0 && thd->is_error())
5777         return true;
5778 
5779       /*
5780         Check for "impossible range", but make sure that we do not attempt
5781         to mark semi-joined tables as "const" (only semi-joined tables that
5782         are functionally dependent can be marked "const", and subsequently
5783         pulled out of their semi-join nests).
5784       */
5785       if (records == 0 &&
5786           tab->table()->reginfo.impossible_range &&
5787           (!(tl->embedding && tl->embedding->sj_cond())))
5788       {
5789         /*
5790           Impossible WHERE condition or join condition
5791           In case of join cond, mark that one empty NULL row is matched.
5792           In case of WHERE, don't set found_const_table_map to get the
5793           caller to abort with a zero row result.
5794         */
5795         mark_const_table(tab, NULL);
5796         tab->set_type(JT_CONST);  // Override setting made in mark_const_table()
5797         if (tab->join_cond())
5798         {
5799           // Generate an empty row
5800           trace_table.add("returning_empty_null_row", true).
5801             add_alnum("cause", "impossible_on_condition");
5802           found_const_table_map|= tl->map();
5803           tab->table()->set_null_row();  // All fields are NULL
5804         }
5805         else
5806         {
5807           trace_table.add("rows", 0).
5808             add_alnum("cause", "impossible_where_condition");
5809         }
5810       }
5811       if (records != HA_POS_ERROR)
5812       {
5813         tab->found_records= records;
5814         tab->read_time= (ha_rows) (tab->quick() ?
5815                                    tab->quick()->cost_est.total_cost() : 0.0);
5816       }
5817     }
5818     else
5819     {
5820       Opt_trace_object(trace, "table_scan").
5821         add("rows", tab->found_records).
5822         add("cost", tab->read_time);
5823     }
5824   }
5825 
5826   return false;
5827 }
5828 
5829 
5830 /**
5831   Set semi-join embedding join nest pointers.
5832 
5833   Set pointer to embedding semi-join nest for all semi-joined tables.
5834   Note that this must be done for every table inside all semi-join nests,
5835   even for tables within outer join nests embedded in semi-join nests.
5836   A table can never be part of multiple semi-join nests, hence no
5837   ambiguities can ever occur.
5838   Note also that the pointer is not set for TABLE_LIST objects that
5839   are outer join nests within semi-join nests.
5840 */
5841 
set_semijoin_embedding()5842 void JOIN::set_semijoin_embedding()
5843 {
5844   assert(!select_lex->sj_nests.is_empty());
5845 
5846   JOIN_TAB *const tab_end= join_tab + primary_tables;
5847 
5848   for (JOIN_TAB *tab= join_tab; tab < tab_end; tab++)
5849   {
5850     for (TABLE_LIST *tl= tab->table_ref; tl->embedding; tl= tl->embedding)
5851     {
5852       if (tl->embedding->sj_cond())
5853       {
5854         tab->emb_sj_nest= tl->embedding;
5855         break;
5856       }
5857     }
5858   }
5859 }
5860 
5861 
5862 /**
5863   @brief Check if semijoin's compared types allow materialization.
5864 
5865   @param[inout] sj_nest Semi-join nest containing information about correlated
5866          expressions. Set nested_join->sjm.scan_allowed to TRUE if
5867          MaterializeScan strategy allowed. Set nested_join->sjm.lookup_allowed
5868          to TRUE if MaterializeLookup strategy allowed
5869 
5870   @details
5871     This is a temporary fix for BUG#36752.
5872 
5873     There are two subquery materialization strategies for semijoin:
5874 
5875     1. Materialize and do index lookups in the materialized table. See
5876        BUG#36752 for description of restrictions we need to put on the
5877        compared expressions.
5878 
5879        In addition, since indexes are not supported for BLOB columns,
5880        this strategy can not be used if any of the columns in the
5881        materialized table will be BLOB/GEOMETRY columns.  (Note that
5882        also columns for non-BLOB values that may be greater in size
5883        than CONVERT_IF_BIGGER_TO_BLOB, will be represented as BLOB
5884        columns.)
5885 
5886     2. Materialize and then do a full scan of the materialized table.
5887        The same criteria as for MaterializeLookup are applied, except that
5888        BLOB/GEOMETRY columns are allowed.
5889 */
5890 
5891 static
semijoin_types_allow_materialization(TABLE_LIST * sj_nest)5892 void semijoin_types_allow_materialization(TABLE_LIST *sj_nest)
5893 {
5894   DBUG_ENTER("semijoin_types_allow_materialization");
5895 
5896   assert(sj_nest->nested_join->sj_outer_exprs.elements ==
5897          sj_nest->nested_join->sj_inner_exprs.elements);
5898 
5899   if (sj_nest->nested_join->sj_outer_exprs.elements > MAX_REF_PARTS)
5900   {
5901     sj_nest->nested_join->sjm.scan_allowed= false;
5902     sj_nest->nested_join->sjm.lookup_allowed= false;
5903     DBUG_VOID_RETURN;
5904   }
5905 
5906   List_iterator<Item> it1(sj_nest->nested_join->sj_outer_exprs);
5907   List_iterator<Item> it2(sj_nest->nested_join->sj_inner_exprs);
5908 
5909   sj_nest->nested_join->sjm.scan_allowed= true;
5910   sj_nest->nested_join->sjm.lookup_allowed= true;
5911 
5912   bool blobs_involved= false;
5913   Item *outer, *inner;
5914   uint total_lookup_index_length= 0;
5915   uint max_key_length;
5916   uint max_key_part_length;
5917   /*
5918     Maximum lengths for keys and key parts that are supported by
5919     the temporary table storage engine(s).
5920   */
5921   get_max_key_and_part_length(&max_key_length,
5922                               &max_key_part_length);
5923   while (outer= it1++, inner= it2++)
5924   {
5925     assert(outer->real_item() && inner->real_item());
5926     if (!types_allow_materialization(outer, inner))
5927     {
5928       sj_nest->nested_join->sjm.scan_allowed= false;
5929       sj_nest->nested_join->sjm.lookup_allowed= false;
5930       DBUG_VOID_RETURN;
5931     }
5932     blobs_involved|= inner->is_blob_field();
5933 
5934     // Calculate the index length of materialized table
5935     const uint lookup_index_length= get_key_length_tmp_table(inner);
5936     if (lookup_index_length > max_key_part_length)
5937       sj_nest->nested_join->sjm.lookup_allowed= false;
5938     total_lookup_index_length+= lookup_index_length ;
5939   }
5940   if (total_lookup_index_length > max_key_length)
5941     sj_nest->nested_join->sjm.lookup_allowed= false;
5942 
5943   if (blobs_involved)
5944     sj_nest->nested_join->sjm.lookup_allowed= false;
5945 
5946   if (sj_nest->embedding)
5947   {
5948     assert(sj_nest->embedding->join_cond_optim());
5949     /*
5950       There are two issues that prevent materialization strategy from being
5951       used when a semi-join nest is on the inner side of an outer join:
5952       1. If the semi-join contains dependencies to outer tables,
5953          materialize-scan strategy cannot be used.
5954       2. Make sure that executor is able to evaluate triggered conditions
5955          for semi-join materialized tables. It should be correct, but needs
5956          verification.
5957          TODO: Remove this limitation!
5958       Handle this by disabling materialization strategies:
5959     */
5960     sj_nest->nested_join->sjm.scan_allowed= false;
5961     sj_nest->nested_join->sjm.lookup_allowed= false;
5962     DBUG_VOID_RETURN;
5963   }
5964 
5965   DBUG_PRINT("info",("semijoin_types_allow_materialization: ok, allowed"));
5966 
5967   DBUG_VOID_RETURN;
5968 }
5969 
5970 
5971 /*****************************************************************************
5972   Create JOIN_TABS, make a guess about the table types,
5973   Approximate how many records will be used in each table
5974 *****************************************************************************/
5975 
5976 /**
5977   Returns estimated number of rows that could be fetched by given
5978   access method.
5979 
5980   The function calls the range optimizer to estimate the cost of the
5981   cheapest QUICK_* index access method to scan one or several of the
5982   'keys' using the conditions 'select->cond'. The range optimizer
5983   compares several different types of 'quick select' methods (range
5984   scan, index merge, loose index scan) and selects the cheapest one.
5985 
5986   If the best index access method is cheaper than a table- and an index
5987   scan, then the range optimizer also constructs the corresponding
5988   QUICK_* object and assigns it to select->quick. In most cases this
5989   is the QUICK_* object used at later (optimization and execution)
5990   phases.
5991 
5992   @param thd    Session that runs the query.
5993   @param tab    JOIN_TAB of source table.
5994   @param limit  maximum number of rows to select.
5995 
5996   @note
5997     In case of valid range, a QUICK_SELECT_I object will be constructed and
5998     saved in select->quick.
5999 
6000   @return Estimated number of result rows selected from 'tab'.
6001 
6002   @retval HA_POS_ERROR For derived tables/views or if an error occur.
6003   @retval 0            If impossible query (i.e. certainly no rows will be
6004                        selected.)
6005 */
get_quick_record_count(THD * thd,JOIN_TAB * tab,ha_rows limit)6006 static ha_rows get_quick_record_count(THD *thd, JOIN_TAB *tab, ha_rows limit)
6007 {
6008   DBUG_ENTER("get_quick_record_count");
6009   uchar buff[STACK_BUFF_ALLOC];
6010   if (check_stack_overrun(thd, STACK_MIN_SIZE, buff))
6011     DBUG_RETURN(0);                           // Fatal error flag is set
6012 
6013   TABLE_LIST *const tl= tab->table_ref;
6014 
6015   // Derived tables aren't filled yet, so no stats are available.
6016   if (!tl->uses_materialization())
6017   {
6018     QUICK_SELECT_I *qck;
6019     int error= test_quick_select(thd,
6020                                  tab->const_keys,
6021                                  0,      //empty table_map
6022                                  limit,
6023                                  false,  //don't force quick range
6024                                  ORDER::ORDER_NOT_RELEVANT, tab,
6025                                  tab->join_cond() ? tab->join_cond() :
6026                                  tab->join()->where_cond,
6027                                  &tab->needed_reg, &qck, tab->table()->force_index);
6028     tab->set_quick(qck);
6029 
6030     if (error == 1)
6031       DBUG_RETURN(qck->records);
6032     if (error == -1)
6033     {
6034       tl->table->reginfo.impossible_range=1;
6035       DBUG_RETURN(0);
6036     }
6037     DBUG_PRINT("warning",("Couldn't use record count on const keypart"));
6038   }
6039   else if (tl->materializable_is_const())
6040   {
6041     DBUG_RETURN(tl->derived_unit()->query_result()->estimated_rowcount);
6042   }
6043   DBUG_RETURN(HA_POS_ERROR);
6044 }
6045 
6046 /*
6047   Get estimated record length for semi-join materialization temptable
6048 
6049   SYNOPSIS
6050     get_tmp_table_rec_length()
6051       items  IN subquery's select list.
6052 
6053   DESCRIPTION
6054     Calculate estimated record length for semi-join materialization
6055     temptable. It's an estimate because we don't follow every bit of
6056     create_tmp_table()'s logic. This isn't necessary as the return value of
6057     this function is used only for cost calculations.
6058 
6059   RETURN
6060     Length of the temptable record, in bytes
6061 */
6062 
get_tmp_table_rec_length(List<Item> & items)6063 static uint get_tmp_table_rec_length(List<Item> &items)
6064 {
6065   uint len= 0;
6066   Item *item;
6067   List_iterator<Item> it(items);
6068   while ((item= it++))
6069   {
6070     switch (item->result_type()) {
6071     case REAL_RESULT:
6072       len += sizeof(double);
6073       break;
6074     case INT_RESULT:
6075       if (item->max_length >= (MY_INT32_NUM_DECIMAL_DIGITS - 1))
6076         len += 8;
6077       else
6078         len += 4;
6079       break;
6080     case STRING_RESULT:
6081       /* DATE/TIME and GEOMETRY fields have STRING_RESULT result type.  */
6082       if (item->is_temporal() || item->field_type() == MYSQL_TYPE_GEOMETRY)
6083         len += 8;
6084       else
6085         len += item->max_length;
6086       break;
6087     case DECIMAL_RESULT:
6088       len += 10;
6089       break;
6090     case ROW_RESULT:
6091     default:
6092       assert(0); /* purecov: deadcode */
6093       break;
6094     }
6095   }
6096   return len;
6097 }
6098 
6099 
6100 /**
6101    Writes to the optimizer trace information about dependencies between
6102    tables.
6103    @param trace  optimizer trace
6104    @param join_tabs  all JOIN_TABs of the join
6105    @param table_count how many JOIN_TABs in the 'join_tabs' array
6106 */
trace_table_dependencies(Opt_trace_context * trace,JOIN_TAB * join_tabs,uint table_count)6107 static void trace_table_dependencies(Opt_trace_context * trace,
6108                                      JOIN_TAB *join_tabs,
6109                                      uint table_count)
6110 {
6111   Opt_trace_object trace_wrapper(trace);
6112   Opt_trace_array trace_dep(trace, "table_dependencies");
6113   for (uint i= 0 ; i < table_count ; i++)
6114   {
6115     TABLE_LIST *table_ref= join_tabs[i].table_ref;
6116     Opt_trace_object trace_one_table(trace);
6117     trace_one_table.add_utf8_table(table_ref).
6118       add("row_may_be_null", table_ref->table->is_nullable());
6119     const table_map map= table_ref->map();
6120     assert(map < (1ULL << table_count));
6121     for (uint j= 0; j < table_count; j++)
6122     {
6123       if (map & (1ULL << j))
6124       {
6125         trace_one_table.add("map_bit", j);
6126         break;
6127       }
6128     }
6129     Opt_trace_array depends_on(trace, "depends_on_map_bits");
6130     // RAND_TABLE_BIT may be in join_tabs[i].dependent, so we test all 64 bits
6131     compile_time_assert(sizeof(table_ref->map()) <= 64);
6132     for (uint j= 0; j < 64; j++)
6133     {
6134       if (join_tabs[i].dependent & (1ULL << j))
6135         depends_on.add(j);
6136     }
6137   }
6138 }
6139 
6140 
6141 /**
6142   Add to join_tab[i]->condition() "table.field IS NOT NULL" conditions
6143   we've inferred from ref/eq_ref access performed.
6144 
6145     This function is a part of "Early NULL-values filtering for ref access"
6146     optimization.
6147 
6148     Example of this optimization:
6149     For query SELECT * FROM t1,t2 WHERE t2.key=t1.field @n
6150     and plan " any-access(t1), ref(t2.key=t1.field) " @n
6151     add "t1.field IS NOT NULL" to t1's table condition. @n
6152 
6153     Description of the optimization:
6154 
6155       We look through equalities choosen to perform ref/eq_ref access,
6156       pick equalities that have form "tbl.part_of_key = othertbl.field"
6157       (where othertbl is a non-const table and othertbl.field may be NULL)
6158       and add them to conditions on correspoding tables (othertbl in this
6159       example).
6160 
6161       Exception from that is the case when referred_tab->join != join.
6162       I.e. don't add NOT NULL constraints from any embedded subquery.
6163       Consider this query:
6164       @code
6165       SELECT A.f2 FROM t1 LEFT JOIN t2 A ON A.f2 = f1
6166       WHERE A.f3=(SELECT MIN(f3) FROM  t2 C WHERE A.f4 = C.f4) OR A.f3 IS NULL;
6167       @endcode
6168       Here condition A.f3 IS NOT NULL is going to be added to the WHERE
6169       condition of the embedding query.
6170       Another example:
6171       SELECT * FROM t10, t11 WHERE (t10.a < 10 OR t10.a IS NULL)
6172       AND t11.b <=> t10.b AND (t11.a = (SELECT MAX(a) FROM t12
6173       WHERE t12.b = t10.a ));
6174       Here condition t10.a IS NOT NULL is going to be added.
6175       In both cases addition of NOT NULL condition will erroneously reject
6176       some rows of the result set.
6177       referred_tab->join != join constraint would disallow such additions.
6178 
6179       This optimization doesn't affect the choices that ref, range, or join
6180       optimizer make. This was intentional because this was added after 4.1
6181       was GA.
6182 
6183     Implementation overview
6184       1. update_ref_and_keys() accumulates info about null-rejecting
6185          predicates in in Key_field::null_rejecting
6186       1.1 add_key_part saves these to Key_use.
6187       2. create_ref_for_key copies them to TABLE_REF.
6188       3. add_not_null_conds adds "x IS NOT NULL" to join_tab->m_condition of
6189          appropiate JOIN_TAB members.
6190 */
6191 
add_not_null_conds(JOIN * join)6192 static void add_not_null_conds(JOIN *join)
6193 {
6194   DBUG_ENTER("add_not_null_conds");
6195   ASSERT_BEST_REF_IN_JOIN_ORDER(join);
6196   for (uint i=join->const_tables ; i < join->tables ; i++)
6197   {
6198     JOIN_TAB *const tab= join->best_ref[i];
6199     if ((tab->type() == JT_REF || tab->type() == JT_EQ_REF ||
6200          tab->type() == JT_REF_OR_NULL) &&
6201         !tab->table()->is_nullable())
6202     {
6203       for (uint keypart= 0; keypart < tab->ref().key_parts; keypart++)
6204       {
6205         if (tab->ref().null_rejecting & ((key_part_map)1 << keypart))
6206         {
6207           Item *item= tab->ref().items[keypart];
6208           Item *notnull;
6209           Item *real= item->real_item();
6210           assert(real->type() == Item::FIELD_ITEM);
6211           Item_field *not_null_item= (Item_field*)real;
6212           JOIN_TAB *referred_tab= not_null_item->field->table->reginfo.join_tab;
6213           /*
6214             For UPDATE queries such as:
6215             UPDATE t1 SET t1.f2=(SELECT MAX(t2.f4) FROM t2 WHERE t2.f3=t1.f1);
6216             not_null_item is the t1.f1, but it's referred_tab is 0.
6217           */
6218           if (!referred_tab || referred_tab->join() != join)
6219             continue;
6220           if (!(notnull= new Item_func_isnotnull(not_null_item)))
6221             DBUG_VOID_RETURN;
6222           /*
6223             We need to do full fix_fields() call here in order to have correct
6224             notnull->const_item(). This is needed e.g. by test_quick_select
6225             when it is called from make_join_select after this function is
6226             called.
6227           */
6228           if (notnull->fix_fields(join->thd, &notnull))
6229             DBUG_VOID_RETURN;
6230           DBUG_EXECUTE("where",print_where(notnull,
6231                                            referred_tab->table()->alias,
6232                                            QT_ORDINARY););
6233           referred_tab->and_with_condition(notnull);
6234         }
6235       }
6236     }
6237   }
6238   DBUG_VOID_RETURN;
6239 }
6240 
6241 
6242 /**
6243   Check if given expression only uses fields covered by index #keyno in the
6244   table tbl. The expression can use any fields in any other tables.
6245 
6246   The expression is guaranteed not to be AND or OR - those constructs are
6247   handled outside of this function.
6248 
6249   Restrict some function types from being pushed down to storage engine:
6250   a) Don't push down the triggered conditions. Nested outer joins execution
6251      code may need to evaluate a condition several times (both triggered and
6252      untriggered).
6253   b) Stored functions contain a statement that might start new operations (like
6254      DML statements) from within the storage engine. This does not work against
6255      all SEs.
6256   c) Subqueries might contain nested subqueries and involve more tables.
6257 
6258   @param  item           Expression to check
6259   @param  tbl            The table having the index
6260   @param  keyno          The index number
6261   @param  other_tbls_ok  TRUE <=> Fields of other non-const tables are allowed
6262 
6263   @return false if No, true if Yes
6264 */
6265 
uses_index_fields_only(Item * item,TABLE * tbl,uint keyno,bool other_tbls_ok)6266 bool uses_index_fields_only(Item *item, TABLE *tbl, uint keyno,
6267                             bool other_tbls_ok)
6268 {
6269   // Restrictions b and c.
6270   if (item->has_stored_program() || item->has_subquery())
6271     return false;
6272 
6273   if (item->const_item())
6274     return true;
6275 
6276   const Item::Type item_type= item->type();
6277 
6278   switch (item_type) {
6279   case Item::FUNC_ITEM:
6280     {
6281       Item_func *item_func= (Item_func*)item;
6282       const Item_func::Functype func_type= item_func->functype();
6283 
6284       /*
6285         Restriction a.
6286         TODO: Consider cloning the triggered condition and using the copies
6287         for:
6288         1. push the first copy down, to have most restrictive index condition
6289            possible.
6290         2. Put the second copy into tab->m_condition.
6291       */
6292       if (func_type == Item_func::TRIG_COND_FUNC)
6293         return false;
6294 
6295       /* This is a function, apply condition recursively to arguments */
6296       if (item_func->argument_count() > 0)
6297       {
6298         Item **item_end= (item_func->arguments()) + item_func->argument_count();
6299         for (Item **child= item_func->arguments(); child != item_end; child++)
6300         {
6301           if (!uses_index_fields_only(*child, tbl, keyno, other_tbls_ok))
6302             return FALSE;
6303         }
6304       }
6305       return TRUE;
6306     }
6307   case Item::COND_ITEM:
6308     {
6309       /*
6310         This is a AND/OR condition. Regular AND/OR clauses are handled by
6311         make_cond_for_index() which will chop off the part that can be
6312         checked with index. This code is for handling non-top-level AND/ORs,
6313         e.g. func(x AND y).
6314       */
6315       List_iterator<Item> li(*((Item_cond*)item)->argument_list());
6316       Item *item;
6317       while ((item=li++))
6318       {
6319         if (!uses_index_fields_only(item, tbl, keyno, other_tbls_ok))
6320           return FALSE;
6321       }
6322       return TRUE;
6323     }
6324   case Item::FIELD_ITEM:
6325     {
6326       Item_field *item_field= (Item_field*)item;
6327       if (item_field->field->table != tbl)
6328         return other_tbls_ok;
6329       /*
6330         The below is probably a repetition - the first part checks the
6331         other two, but let's play it safe:
6332       */
6333       return item_field->field->part_of_key.is_set(keyno) &&
6334              item_field->field->type() != MYSQL_TYPE_GEOMETRY &&
6335              item_field->field->type() != MYSQL_TYPE_BLOB;
6336     }
6337   case Item::REF_ITEM:
6338     return uses_index_fields_only(item->real_item(), tbl, keyno,
6339                                   other_tbls_ok);
6340   default:
6341     return FALSE; /* Play it safe, don't push unknown non-const items */
6342   }
6343 }
6344 
6345 
6346 /**
6347   Optimize semi-join nests that could be run with sj-materialization
6348 
6349   @param join           The join to optimize semi-join nests for
6350 
6351   @details
6352     Optimize each of the semi-join nests that can be run with
6353     materialization. For each of the nests, we
6354      - Generate the best join order for this "sub-join" and remember it;
6355      - Remember the sub-join execution cost (it's part of materialization
6356        cost);
6357      - Calculate other costs that will be incurred if we decide
6358        to use materialization strategy for this semi-join nest.
6359 
6360     All obtained information is saved and will be used by the main join
6361     optimization pass.
6362 
6363   @return false if successful, true if error
6364 */
6365 
optimize_semijoin_nests_for_materialization(JOIN * join)6366 static bool optimize_semijoin_nests_for_materialization(JOIN *join)
6367 {
6368   DBUG_ENTER("optimize_semijoin_nests_for_materialization");
6369   List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests);
6370   TABLE_LIST *sj_nest;
6371   Opt_trace_context * const trace= &join->thd->opt_trace;
6372 
6373   while ((sj_nest= sj_list_it++))
6374   {
6375     /* As a precaution, reset pointers that were used in prior execution */
6376     sj_nest->nested_join->sjm.positions= NULL;
6377 
6378     /* Calculate the cost of materialization if materialization is allowed. */
6379     if (sj_nest->nested_join->sj_enabled_strategies &
6380         OPTIMIZER_SWITCH_MATERIALIZATION)
6381     {
6382       /* A semi-join nest should not contain tables marked as const */
6383       assert(!(sj_nest->sj_inner_tables & join->const_table_map));
6384 
6385       Opt_trace_object trace_wrapper(trace);
6386       Opt_trace_object
6387         trace_sjmat(trace, "execution_plan_for_potential_materialization");
6388       Opt_trace_array trace_sjmat_steps(trace, "steps");
6389       /*
6390         Try semijoin materialization if the semijoin is classified as
6391         non-trivially-correlated.
6392       */
6393       if (sj_nest->nested_join->sj_corr_tables)
6394         continue;
6395       /*
6396         Check whether data types allow execution with materialization.
6397       */
6398       semijoin_types_allow_materialization(sj_nest);
6399 
6400       if (!sj_nest->nested_join->sjm.scan_allowed &&
6401           !sj_nest->nested_join->sjm.lookup_allowed)
6402         continue;
6403 
6404       if (Optimize_table_order(join->thd, join, sj_nest).choose_table_order())
6405         DBUG_RETURN(true);
6406       const uint n_tables= my_count_bits(sj_nest->sj_inner_tables);
6407       calculate_materialization_costs(join, sj_nest, n_tables,
6408                                       &sj_nest->nested_join->sjm);
6409       /*
6410         Cost data is in sj_nest->nested_join->sjm. We also need to save the
6411         plan:
6412       */
6413       if (!(sj_nest->nested_join->sjm.positions=
6414             (st_position*)join->thd->alloc(sizeof(st_position)*n_tables)))
6415         DBUG_RETURN(true);
6416       memcpy(sj_nest->nested_join->sjm.positions,
6417              join->best_positions + join->const_tables,
6418              sizeof(st_position) * n_tables);
6419     }
6420   }
6421   DBUG_RETURN(false);
6422 }
6423 
6424 
6425 /*
6426   Check if table's Key_use elements have an eq_ref(outer_tables) candidate
6427 
6428   SYNOPSIS
6429     find_eq_ref_candidate()
6430       tl                Table to be checked
6431       sj_inner_tables   Bitmap of inner tables. eq_ref(inner_table) doesn't
6432                         count.
6433 
6434   DESCRIPTION
6435     Check if table's Key_use elements have an eq_ref(outer_tables) candidate
6436 
6437   TODO
6438     Check again if it is feasible to factor common parts with constant table
6439     search
6440 
6441   RETURN
6442     TRUE  - There exists an eq_ref(outer-tables) candidate
6443     FALSE - Otherwise
6444 */
6445 
find_eq_ref_candidate(TABLE_LIST * tl,table_map sj_inner_tables)6446 static bool find_eq_ref_candidate(TABLE_LIST *tl, table_map sj_inner_tables)
6447 {
6448   Key_use *keyuse= tl->table->reginfo.join_tab->keyuse();
6449 
6450   if (keyuse)
6451   {
6452     while (1) /* For each key */
6453     {
6454       const uint key= keyuse->key;
6455       KEY *const keyinfo= tl->table->key_info + key;
6456       key_part_map bound_parts= 0;
6457       if ((keyinfo->flags & (HA_NOSAME)) == HA_NOSAME)
6458       {
6459         do  /* For all equalities on all key parts */
6460         {
6461           /* Check if this is "t.keypart = expr(outer_tables) */
6462           if (!(keyuse->used_tables & sj_inner_tables) &&
6463               !(keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL))
6464           {
6465             /*
6466               Consider only if the resulting condition does not pass a NULL
6467               value through. Especially needed for a UNIQUE index on NULLable
6468               columns where a duplicate row is possible with NULL values.
6469             */
6470             if (keyuse->null_rejecting || !keyuse->val->maybe_null ||
6471                 !keyinfo->key_part[keyuse->keypart].field->maybe_null())
6472               bound_parts|= (key_part_map)1 << keyuse->keypart;
6473           }
6474           keyuse++;
6475         } while (keyuse->key == key && keyuse->table_ref == tl);
6476 
6477         if (bound_parts == LOWER_BITS(uint, keyinfo->user_defined_key_parts))
6478           return true;
6479         if (keyuse->table_ref != tl)
6480           return false;
6481       }
6482       else
6483       {
6484         do
6485         {
6486           keyuse++;
6487           if (keyuse->table_ref != tl)
6488             return false;
6489         }
6490         while (keyuse->key == key);
6491       }
6492     }
6493   }
6494   return false;
6495 }
6496 
6497 
6498 /**
6499   Pull tables out of semi-join nests based on functional dependencies
6500 
6501   @param join  The join where to do the semi-join table pullout
6502 
6503   @return False if successful, true if error (Out of memory)
6504 
6505   @details
6506     Pull tables out of semi-join nests based on functional dependencies,
6507     ie. if a table is accessed via eq_ref(outer_tables).
6508     The function may be called several times, the caller is responsible
6509     for setting up proper key information that this function acts upon.
6510 
6511     PRECONDITIONS
6512     When this function is called, the join may have several semi-join nests
6513     but it is guaranteed that one semi-join nest does not contain another.
6514     For functionally dependent tables to be pulled out, key information must
6515     have been calculated (see update_ref_and_keys()).
6516 
6517     POSTCONDITIONS
6518      * Tables that were pulled out are removed from the semi-join nest they
6519        belonged to and added to the parent join nest.
6520      * For these tables, the used_tables and not_null_tables fields of
6521        the semi-join nest they belonged to will be adjusted.
6522        The semi-join nest is also marked as correlated, and
6523        sj_corr_tables and sj_depends_on are adjusted if necessary.
6524      * Semi-join nests' sj_inner_tables is set equal to used_tables
6525 
6526     NOTE
6527     Table pullout may make uncorrelated subquery correlated. Consider this
6528     example:
6529 
6530      ... WHERE oe IN (SELECT it1.primary_key WHERE p(it1, it2) ... )
6531 
6532     here table it1 can be pulled out (we have it1.primary_key=oe which gives
6533     us functional dependency). Once it1 is pulled out, all references to it1
6534     from p(it1, it2) become references to outside of the subquery and thus
6535     make the subquery (i.e. its semi-join nest) correlated.
6536     Making the subquery (i.e. its semi-join nest) correlated prevents us from
6537     using Materialization or LooseScan to execute it.
6538 */
6539 
pull_out_semijoin_tables(JOIN * join)6540 static bool pull_out_semijoin_tables(JOIN *join)
6541 {
6542   TABLE_LIST *sj_nest;
6543   DBUG_ENTER("pull_out_semijoin_tables");
6544 
6545   assert(!join->select_lex->sj_nests.is_empty());
6546 
6547   List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests);
6548   Opt_trace_context * const trace= &join->thd->opt_trace;
6549   Opt_trace_object trace_wrapper(trace);
6550   Opt_trace_array trace_pullout(trace, "pulled_out_semijoin_tables");
6551 
6552   /* Try pulling out tables from each semi-join nest */
6553   while ((sj_nest= sj_list_it++))
6554   {
6555     table_map pulled_tables= 0;
6556     List_iterator<TABLE_LIST> child_li(sj_nest->nested_join->join_list);
6557     TABLE_LIST *tbl;
6558     /*
6559       Calculate set of tables within this semi-join nest that have
6560       other dependent tables
6561     */
6562     table_map dep_tables= 0;
6563     while ((tbl= child_li++))
6564     {
6565       TABLE *const table= tbl->table;
6566       if (table &&
6567          (table->reginfo.join_tab->dependent &
6568           sj_nest->nested_join->used_tables))
6569         dep_tables|= table->reginfo.join_tab->dependent;
6570     }
6571     /*
6572       Find which tables we can pull out based on key dependency data.
6573       Note that pulling one table out can allow us to pull out some
6574       other tables too.
6575     */
6576     bool pulled_a_table;
6577     do
6578     {
6579       pulled_a_table= FALSE;
6580       child_li.rewind();
6581       while ((tbl= child_li++))
6582       {
6583         if (tbl->table &&
6584             !(pulled_tables & tbl->map()) &&
6585             !(dep_tables & tbl->map()))
6586         {
6587           if (find_eq_ref_candidate(tbl,
6588                                     sj_nest->nested_join->used_tables &
6589                                     ~pulled_tables))
6590           {
6591             pulled_a_table= TRUE;
6592             pulled_tables |= tbl->map();
6593             Opt_trace_object(trace).add_utf8_table(tbl).
6594               add("functionally_dependent", true);
6595             /*
6596               Pulling a table out of uncorrelated subquery in general makes
6597               it correlated. See the NOTE to this function.
6598             */
6599             sj_nest->nested_join->sj_corr_tables|= tbl->map();
6600             sj_nest->nested_join->sj_depends_on|= tbl->map();
6601           }
6602         }
6603       }
6604     } while (pulled_a_table);
6605 
6606     child_li.rewind();
6607     /*
6608       Move the pulled out TABLE_LIST elements to the parents.
6609     */
6610     sj_nest->nested_join->used_tables&= ~pulled_tables;
6611     sj_nest->nested_join->not_null_tables&= ~pulled_tables;
6612 
6613     /* sj_inner_tables is a copy of nested_join->used_tables */
6614     sj_nest->sj_inner_tables= sj_nest->nested_join->used_tables;
6615 
6616     if (pulled_tables)
6617     {
6618       List<TABLE_LIST> *upper_join_list= (sj_nest->embedding != NULL) ?
6619           &sj_nest->embedding->nested_join->join_list :
6620           &join->select_lex->top_join_list;
6621 
6622       Prepared_stmt_arena_holder ps_arena_holder(join->thd);
6623 
6624       while ((tbl= child_li++))
6625       {
6626         if (tbl->table &&
6627             !(sj_nest->nested_join->used_tables & tbl->map()))
6628         {
6629           /*
6630             Pull the table up in the same way as simplify_joins() does:
6631             update join_list and embedding pointers but keep next[_local]
6632             pointers.
6633           */
6634           child_li.remove();
6635 
6636           if (upper_join_list->push_back(tbl))
6637             DBUG_RETURN(TRUE);
6638 
6639           tbl->join_list= upper_join_list;
6640           tbl->embedding= sj_nest->embedding;
6641         }
6642       }
6643 
6644       /* Remove the sj-nest itself if we've removed everything from it */
6645       if (!sj_nest->nested_join->used_tables)
6646       {
6647         List_iterator<TABLE_LIST> li(*upper_join_list);
6648         /* Find the sj_nest in the list. */
6649         while (sj_nest != li++)
6650         {}
6651         li.remove();
6652         /* Also remove it from the list of SJ-nests: */
6653         sj_list_it.remove();
6654       }
6655     }
6656   }
6657   DBUG_RETURN(FALSE);
6658 }
6659 
6660 
6661 /**
6662   @defgroup RefOptimizerModule Ref Optimizer
6663 
6664   @{
6665 
6666   This module analyzes all equality predicates to determine the best
6667   independent ref/eq_ref/ref_or_null index access methods.
6668 
6669   The 'ref' optimizer determines the columns (and expressions over them) that
6670   reference columns in other tables via an equality, and analyzes which keys
6671   and key parts can be used for index lookup based on these references. The
6672   main outcomes of the 'ref' optimizer are:
6673 
6674   - A bi-directional graph of all equi-join conditions represented as an
6675     array of Key_use elements. This array is stored in JOIN::keyuse_array in
6676     table, key, keypart order. Each JOIN_TAB::keyuse points to the
6677     first Key_use element with the same table as JOIN_TAB::table.
6678 
6679   - The table dependencies needed by the optimizer to determine what
6680     tables must be before certain table so that they provide the
6681     necessary column bindings for the equality predicates.
6682 
6683   - Computed properties of the equality predicates such as null_rejecting
6684     and the result size of each separate condition.
6685 
6686   Updates in JOIN_TAB:
6687   - JOIN_TAB::keys       Bitmap of all used keys.
6688   - JOIN_TAB::const_keys Bitmap of all keys that may be used with quick_select.
6689   - JOIN_TAB::keyuse     Pointer to possible keys.
6690 */
6691 
6692 /**
6693   A Key_field is a descriptor of a predicate of the form (column <op> val).
6694   Currently 'op' is one of {'=', '<=>', 'IS [NOT] NULL', 'arg1 IN arg2'},
6695   and 'val' can be either another column or an expression (including constants).
6696 
6697   Key_field's are used to analyze columns that may potentially serve as
6698   parts of keys for index lookup. If 'field' is part of an index, then
6699   add_key_part() creates a corresponding Key_use object and inserts it
6700   into the JOIN::keyuse_array which is passed by update_ref_and_keys().
6701 
6702   The structure is used only during analysis of the candidate columns for
6703   index 'ref' access.
6704 */
6705 struct Key_field {
Key_fieldKey_field6706   Key_field(Item_field *item_field, Item *val, uint level,
6707             uint optimize, bool eq_func,
6708             bool null_rejecting, bool *cond_guard, uint sj_pred_no)
6709   : item_field(item_field), val(val), level(level),
6710     optimize(optimize), eq_func(eq_func),
6711     null_rejecting(null_rejecting), cond_guard(cond_guard),
6712     sj_pred_no(sj_pred_no)
6713   {}
6714   Item_field    *item_field;           ///< Item representing the column
6715   Item          *val;                  ///< May be empty if diff constant
6716   uint          level;
6717   uint          optimize;              ///< KEY_OPTIMIZE_*
6718   bool          eq_func;
6719   /**
6720     If true, the condition this struct represents will not be satisfied
6721     when val IS NULL.
6722     @sa Key_use::null_rejecting .
6723   */
6724   bool          null_rejecting;
6725   bool          *cond_guard;                    ///< @sa Key_use::cond_guard
6726   uint          sj_pred_no;                     ///< @sa Key_use::sj_pred_no
6727 };
6728 
6729 /* Values in optimize */
6730 #define KEY_OPTIMIZE_EXISTS		1
6731 #define KEY_OPTIMIZE_REF_OR_NULL	2
6732 
6733 /**
6734   Merge new key definitions to old ones, remove those not used in both.
6735 
6736   This is called for OR between different levels.
6737 
6738   To be able to do 'ref_or_null' we merge a comparison of a column
6739   and 'column IS NULL' to one test.  This is useful for sub select queries
6740   that are internally transformed to something like:.
6741 
6742   @code
6743   SELECT * FROM t1 WHERE t1.key=outer_ref_field or t1.key IS NULL
6744   @endcode
6745 
6746   Key_field::null_rejecting is processed as follows: @n
6747   result has null_rejecting=true if it is set for both ORed references.
6748   for example:
6749   -   (t2.key = t1.field OR t2.key  =  t1.field) -> null_rejecting=true
6750   -   (t2.key = t1.field OR t2.key <=> t1.field) -> null_rejecting=false
6751 
6752   @todo
6753     The result of this is that we're missing some 'ref' accesses.
6754     OptimizerTeam: Fix this
6755 */
6756 
6757 static Key_field *
merge_key_fields(Key_field * start,Key_field * new_fields,Key_field * end,uint and_level)6758 merge_key_fields(Key_field *start, Key_field *new_fields, Key_field *end,
6759                  uint and_level)
6760 {
6761   if (start == new_fields)
6762     return start;				// Impossible or
6763   if (new_fields == end)
6764     return start;				// No new fields, skip all
6765 
6766   Key_field *first_free=new_fields;
6767 
6768   /* Mark all found fields in old array */
6769   for (; new_fields != end ; new_fields++)
6770   {
6771     Field *const new_field= new_fields->item_field->field;
6772 
6773     for (Key_field *old=start ; old != first_free ; old++)
6774     {
6775       Field *const old_field= old->item_field->field;
6776 
6777       /*
6778         Check that the Field objects are the same, as we may have several
6779         Item_field objects pointing to the same Field:
6780       */
6781       if (old_field == new_field)
6782       {
6783         /*
6784           NOTE: below const_item() call really works as "!used_tables()", i.e.
6785           it can return FALSE where it is feasible to make it return TRUE.
6786 
6787           The cause is as follows: Some of the tables are already known to be
6788           const tables (the detection code is in JOIN::make_join_plan(),
6789           above the update_ref_and_keys() call), but we didn't propagate
6790           information about this: TABLE::const_table is not set to TRUE, and
6791           Item::update_used_tables() hasn't been called for each item.
6792           The result of this is that we're missing some 'ref' accesses.
6793           TODO: OptimizerTeam: Fix this
6794         */
6795         if (!new_fields->val->const_item())
6796         {
6797           /*
6798             If the value matches, we can use the key reference.
6799             If not, we keep it until we have examined all new values
6800           */
6801           if (old->val->eq(new_fields->val, old_field->binary()))
6802           {
6803             old->level= and_level;
6804             old->optimize= ((old->optimize & new_fields->optimize &
6805                              KEY_OPTIMIZE_EXISTS) |
6806                             ((old->optimize | new_fields->optimize) &
6807                              KEY_OPTIMIZE_REF_OR_NULL));
6808             old->null_rejecting= (old->null_rejecting &&
6809                                   new_fields->null_rejecting);
6810           }
6811         }
6812         else if (old->eq_func && new_fields->eq_func &&
6813                  old->val->eq_by_collation(new_fields->val,
6814                                            old_field->binary(),
6815                                            old_field->charset()))
6816         {
6817           old->level= and_level;
6818           old->optimize= ((old->optimize & new_fields->optimize &
6819                            KEY_OPTIMIZE_EXISTS) |
6820                           ((old->optimize | new_fields->optimize) &
6821                            KEY_OPTIMIZE_REF_OR_NULL));
6822           old->null_rejecting= (old->null_rejecting &&
6823                                 new_fields->null_rejecting);
6824         }
6825         else if (old->eq_func && new_fields->eq_func &&
6826                  ((old->val->const_item() && old->val->is_null()) ||
6827                   new_fields->val->is_null()))
6828         {
6829           /* field = expression OR field IS NULL */
6830           old->level= and_level;
6831           old->optimize= KEY_OPTIMIZE_REF_OR_NULL;
6832           /*
6833             Remember the NOT NULL value unless the value does not depend
6834             on other tables.
6835           */
6836           if (!old->val->used_tables() && old->val->is_null())
6837             old->val= new_fields->val;
6838           /* The referred expression can be NULL: */
6839           old->null_rejecting= 0;
6840 	}
6841 	else
6842 	{
6843 	  /*
6844 	    We are comparing two different const.  In this case we can't
6845 	    use a key-lookup on this so it's better to remove the value
6846 	    and let the range optimizer handle it
6847 	  */
6848 	  if (old == --first_free)		// If last item
6849 	    break;
6850 	  *old= *first_free;			// Remove old value
6851 	  old--;				// Retry this value
6852 	}
6853       }
6854     }
6855   }
6856   /* Remove all not used items */
6857   for (Key_field *old=start ; old != first_free ;)
6858   {
6859     if (old->level != and_level)
6860     {						// Not used in all levels
6861       if (old == --first_free)
6862         break;
6863       *old= *first_free;			// Remove old value
6864       continue;
6865     }
6866     old++;
6867   }
6868   return first_free;
6869 }
6870 
6871 
6872 /**
6873   Given a field, return its index in semi-join's select list, or UINT_MAX
6874 
6875   @param item_field Field to be looked up in select list
6876 
6877   @retval =UINT_MAX Field is not from a semijoin-transformed subquery
6878   @retval <UINT_MAX Index in select list of subquery
6879 
6880   @details
6881   Given a field, find its table; then see if the table is within a
6882   semi-join nest and if the field was in select list of the subquery
6883   (if subquery was part of a quantified comparison predicate), or
6884   the field was a result of subquery decorrelation.
6885   If it was, then return the field's index in the select list.
6886   The value is used by LooseScan strategy.
6887 */
6888 
get_semi_join_select_list_index(Item_field * item_field)6889 static uint get_semi_join_select_list_index(Item_field *item_field)
6890 {
6891   TABLE_LIST *emb_sj_nest= item_field->table_ref->embedding;
6892   if (emb_sj_nest && emb_sj_nest->sj_cond())
6893   {
6894     List<Item> &items= emb_sj_nest->nested_join->sj_inner_exprs;
6895     List_iterator<Item> it(items);
6896     for (uint i= 0; i < items.elements; i++)
6897     {
6898       Item *sel_item= it++;
6899       if (sel_item->type() == Item::FIELD_ITEM &&
6900           ((Item_field*)sel_item)->field->eq(item_field->field))
6901         return i;
6902     }
6903   }
6904   return UINT_MAX;
6905 }
6906 
6907 /**
6908    @brief
6909    If EXPLAIN EXTENDED  or if the --safe-updates option is enabled, add a
6910    warning that an index cannot be used for ref access
6911 
6912    @details
6913    If EXPLAIN EXTENDED or if the --safe-updates option is enabled, add a
6914    warning for each index that cannot be used for ref access due to either type
6915    conversion or different collations on the field used for comparison
6916 
6917    Example type conversion (char compared to int):
6918 
6919    CREATE TABLE t1 (url char(1) PRIMARY KEY);
6920    SELECT * FROM t1 WHERE url=1;
6921 
6922    Example different collations (danish vs german2):
6923 
6924    CREATE TABLE t1 (url char(1) PRIMARY KEY) collate latin1_danish_ci;
6925    SELECT * FROM t1 WHERE url='1' collate latin1_german2_ci;
6926 
6927    @param thd                Thread for the connection that submitted the query
6928    @param field              Field used in comparision
6929    @param cant_use_index   Indexes that cannot be used for lookup
6930  */
6931 static void
warn_index_not_applicable(THD * thd,const Field * field,const key_map cant_use_index)6932 warn_index_not_applicable(THD *thd, const Field *field,
6933                           const key_map cant_use_index)
6934 {
6935   if (thd->lex->describe ||
6936       thd->variables.option_bits & OPTION_SAFE_UPDATES)
6937     for (uint j=0 ; j < field->table->s->keys ; j++)
6938       if (cant_use_index.is_set(j))
6939         push_warning_printf(thd,
6940                             Sql_condition::SL_WARNING,
6941                             ER_WARN_INDEX_NOT_APPLICABLE,
6942                             ER(ER_WARN_INDEX_NOT_APPLICABLE),
6943                             "ref",
6944                             field->table->key_info[j].name,
6945                             field->field_name);
6946 }
6947 
6948 /**
6949   Add a possible key to array of possible keys if it's usable as a key
6950 
6951   @param key_fields[in,out] Used as an input paramater in the sense that it is a
6952   pointer to a pointer to a memory area where an array of Key_field objects will
6953   stored. It is used as an out parameter in the sense that the pointer will be
6954   updated to point beyond the last Key_field written.
6955 
6956   @param and_level       And level, to be stored in Key_field
6957   @param cond            Condition predicate
6958   @param field           Field used in comparision
6959   @param eq_func         True if we used =, <=> or IS NULL
6960   @param value           Array of values used for comparison with field
6961   @param num_values      Number of elements in the array of values
6962   @param usable_tables   Tables which can be used for key optimization
6963   @param sargables       IN/OUT Array of found sargable candidates. Will be
6964                          ignored in case eq_func is true.
6965 
6966   @note
6967     If we are doing a NOT NULL comparison on a NOT NULL field in a outer join
6968     table, we store this to be able to do not exists optimization later.
6969 
6970   @return
6971     *key_fields is incremented if we stored a key in the array
6972 */
6973 
6974 static void
add_key_field(Key_field ** key_fields,uint and_level,Item_func * cond,Item_field * item_field,bool eq_func,Item ** value,uint num_values,table_map usable_tables,SARGABLE_PARAM ** sargables)6975 add_key_field(Key_field **key_fields, uint and_level, Item_func *cond,
6976               Item_field *item_field, bool eq_func, Item **value,
6977               uint num_values, table_map usable_tables,
6978               SARGABLE_PARAM **sargables)
6979 {
6980   assert(eq_func || sargables);
6981 
6982   Field *const field= item_field->field;
6983   TABLE_LIST *const tl= item_field->table_ref;
6984 
6985   if (tl->table->reginfo.join_tab == NULL)
6986   {
6987     /*
6988        Due to a bug in IN-to-EXISTS (grep for real_item() in item_subselect.cc
6989        for more info), an index over a field from an outer query might be
6990        considered here, which is incorrect. Their query has been fully
6991        optimized already so their reginfo.join_tab is NULL and we reject them.
6992     */
6993     return;
6994   }
6995 
6996   DBUG_PRINT("info", ("add_key_field for field %s", field->field_name));
6997   uint exists_optimize= 0;
6998   if (!tl->derived_keys_ready && tl->uses_materialization() &&
6999       !tl->table->is_created() &&
7000       tl->update_derived_keys(field, value, num_values))
7001     return;
7002   if (!(field->flags & PART_KEY_FLAG))
7003   {
7004     // Don't remove column IS NULL on a LEFT JOIN table
7005     if (!eq_func || (*value)->type() != Item::NULL_ITEM ||
7006         !tl->table->is_nullable() || field->real_maybe_null())
7007       return;					// Not a key. Skip it
7008     exists_optimize= KEY_OPTIMIZE_EXISTS;
7009     assert(num_values == 1);
7010   }
7011   else
7012   {
7013     table_map used_tables= 0;
7014     bool optimizable= false;
7015     for (uint i=0; i<num_values; i++)
7016     {
7017       used_tables|=(value[i])->used_tables();
7018       if (!((value[i])->used_tables() & (tl->map() | RAND_TABLE_BIT)))
7019         optimizable= true;
7020     }
7021     if (!optimizable)
7022       return;
7023     if (!(usable_tables & tl->map()))
7024     {
7025       if (!eq_func || (*value)->type() != Item::NULL_ITEM ||
7026           !tl->table->is_nullable() || field->real_maybe_null())
7027         return; // Can't use left join optimize
7028       exists_optimize= KEY_OPTIMIZE_EXISTS;
7029     }
7030     else
7031     {
7032       JOIN_TAB *stat= tl->table->reginfo.join_tab;
7033       key_map possible_keys=field->key_start;
7034       possible_keys.intersect(tl->table->keys_in_use_for_query);
7035       stat[0].keys().merge(possible_keys);             // Add possible keys
7036 
7037       /*
7038         Save the following cases:
7039         Field op constant
7040         Field LIKE constant where constant doesn't start with a wildcard
7041         Field = field2 where field2 is in a different table
7042         Field op formula
7043         Field IS NULL
7044         Field IS NOT NULL
7045         Field BETWEEN ...
7046         Field IN ...
7047       */
7048       stat[0].key_dependent|=used_tables;
7049 
7050       bool is_const= true;
7051       for (uint i=0; i<num_values; i++)
7052       {
7053         if (!(is_const&= value[i]->const_item()))
7054           break;
7055       }
7056       if (is_const)
7057         stat[0].const_keys.merge(possible_keys);
7058       else if (!eq_func)
7059       {
7060         /*
7061           Save info to be able check whether this predicate can be
7062           considered as sargable for range analysis after reading const tables.
7063           We do not save info about equalities as update_const_equal_items
7064           will take care of updating info on keys from sargable equalities.
7065         */
7066         assert(sargables);
7067         (*sargables)--;
7068         /*
7069           The sargables and key_fields arrays share the same memory
7070           buffer, and grow from opposite directions, so make sure they
7071           don't cross.
7072         */
7073         assert(*sargables > *reinterpret_cast<SARGABLE_PARAM**>(key_fields));
7074         (*sargables)->field= field;
7075         (*sargables)->arg_value= value;
7076         (*sargables)->num_values= num_values;
7077       }
7078       /*
7079         We can't always use indexes when comparing a string index to a
7080         number. cmp_type() is checked to allow compare of dates to numbers.
7081         eq_func is NEVER true when num_values > 1
7082        */
7083       if (!eq_func)
7084         return;
7085 
7086       /*
7087         Check if the field and value are comparable in the index.
7088         @todo: This code is almost identical to comparable_in_index()
7089         in opt_range.cc. Consider replacing the checks below with a
7090         function call to comparable_in_index()
7091       */
7092       if (field->result_type() == STRING_RESULT)
7093       {
7094         if ((*value)->result_type() != STRING_RESULT)
7095         {
7096           if (field->cmp_type() != (*value)->result_type())
7097           {
7098             warn_index_not_applicable(stat->join()->thd, field, possible_keys);
7099             return;
7100           }
7101         }
7102         else
7103         {
7104           /*
7105             Can't optimize datetime_column=indexed_varchar_column,
7106             also can't use indexes if the effective collation
7107             of the operation differ from the field collation.
7108             IndexedTimeComparedToDate: can't optimize
7109             'indexed_time = temporal_expr_with_date_part' because:
7110             - without index, a TIME column with value '48:00:00' is equal to a
7111             DATETIME column with value 'CURDATE() + 2 days'
7112             - with ref access into the TIME column, CURDATE() + 2 days becomes
7113             "00:00:00" (Field_timef::store_internal() simply extracts the time
7114             part from the datetime) which is a lookup key which does not match
7115             "48:00:00"; so ref access is not be able to give the same result
7116             as without index, so is disabled.
7117             On the other hand, we can optimize indexed_datetime = time
7118             because Field_temporal_with_date::store_time() will convert
7119             48:00:00 to CURDATE() + 2 days which is the correct lookup key.
7120           */
7121           if ((!field->is_temporal() && value[0]->is_temporal()) ||
7122               (field->cmp_type() == STRING_RESULT &&
7123                field->charset() != cond->compare_collation()) ||
7124               field_time_cmp_date(field, value[0]))
7125           {
7126             warn_index_not_applicable(stat->join()->thd, field, possible_keys);
7127             return;
7128           }
7129         }
7130       }
7131 
7132       /*
7133         We can't use indexes when comparing to a JSON value. For example,
7134         the string '{}' should compare equal to the JSON string "{}". If
7135         we use a string index to compare the two strings, we will be
7136         comparing '{}' and '"{}"', which don't compare equal.
7137       */
7138       if (value[0]->result_type() == STRING_RESULT &&
7139           value[0]->field_type() == MYSQL_TYPE_JSON)
7140       {
7141         warn_index_not_applicable(stat->join()->thd, field, possible_keys);
7142         return;
7143       }
7144     }
7145   }
7146   /*
7147     For the moment eq_func is always true. This slot is reserved for future
7148     extensions where we want to remembers other things than just eq comparisons
7149   */
7150   assert(eq_func);
7151   /*
7152     If the condition has form "tbl.keypart = othertbl.field" and
7153     othertbl.field can be NULL, there will be no matches if othertbl.field
7154     has NULL value.
7155     We use null_rejecting in add_not_null_conds() to add
7156     'othertbl.field IS NOT NULL' to tab->m_condition, if this is not an outer
7157     join. We also use it to shortcut reading "tbl" when othertbl.field is
7158     found to be a NULL value (in join_read_always_key() and BKA).
7159   */
7160   Item *const real= (*value)->real_item();
7161   const bool null_rejecting=
7162       ((cond->functype() == Item_func::EQ_FUNC) ||
7163        (cond->functype() == Item_func::MULT_EQUAL_FUNC)) &&
7164       (real->type() == Item::FIELD_ITEM) &&
7165       ((Item_field*)real)->field->maybe_null();
7166 
7167   /* Store possible eq field */
7168   new (*key_fields)
7169     Key_field(item_field, *value, and_level, exists_optimize, eq_func,
7170               null_rejecting, NULL,
7171               get_semi_join_select_list_index(item_field));
7172   (*key_fields)++;
7173   /*
7174     The sargables and key_fields arrays share the same memory buffer,
7175     and grow from opposite directions, so make sure they don't
7176     cross. But if sargables was NULL, eq_func had to be true and we
7177     don't write any sargables.
7178   */
7179   assert(sargables == NULL ||
7180          *key_fields < *reinterpret_cast<Key_field**>(sargables));
7181 }
7182 
7183 /**
7184   Add possible keys to array of possible keys originated from a simple
7185   predicate.
7186 
7187     @param  key_fields     Pointer to add key, if usable
7188     @param  and_level      And level, to be stored in Key_field
7189     @param  cond           Condition predicate
7190     @param  field_item     Field used in comparision
7191     @param  eq_func        True if we used =, <=> or IS NULL
7192     @param  val            Value used for comparison with field
7193                            Is NULL for BETWEEN and IN
7194     @param  usable_tables  Tables which can be used for key optimization
7195     @param  sargables      IN/OUT Array of found sargable candidates
7196 
7197   @note
7198     If field items f1 and f2 belong to the same multiple equality and
7199     a key is added for f1, the the same key is added for f2.
7200 
7201   @returns
7202     *key_fields is incremented if we stored a key in the array
7203 */
7204 
7205 static void
add_key_equal_fields(Key_field ** key_fields,uint and_level,Item_func * cond,Item_field * field_item,bool eq_func,Item ** val,uint num_values,table_map usable_tables,SARGABLE_PARAM ** sargables)7206 add_key_equal_fields(Key_field **key_fields, uint and_level,
7207                      Item_func *cond, Item_field *field_item,
7208                      bool eq_func, Item **val,
7209                      uint num_values, table_map usable_tables,
7210                      SARGABLE_PARAM **sargables)
7211 {
7212   DBUG_ENTER("add_key_equal_fields");
7213 
7214   add_key_field(key_fields, and_level, cond, field_item,
7215                 eq_func, val, num_values, usable_tables, sargables);
7216   Item_equal *item_equal= field_item->item_equal;
7217   if (item_equal)
7218   {
7219     /*
7220       Add to the set of possible key values every substitution of
7221       the field for an equal field included into item_equal
7222     */
7223     Item_equal_iterator it(*item_equal);
7224     Item_field *item;
7225     while ((item= it++))
7226     {
7227       if (!field_item->field->eq(item->field))
7228         add_key_field(key_fields, and_level, cond, item,
7229                       eq_func, val, num_values, usable_tables,
7230                       sargables);
7231     }
7232   }
7233   DBUG_VOID_RETURN;
7234 }
7235 
7236 
7237 /**
7238   Check if an expression is a non-outer field.
7239 
7240   Checks if an expression is a field and belongs to the current select.
7241 
7242   @param   field  Item expression to check
7243 
7244   @return boolean
7245      @retval TRUE   the expression is a local field
7246      @retval FALSE  it's something else
7247 */
7248 
7249 static bool
is_local_field(Item * field)7250 is_local_field (Item *field)
7251 {
7252   return field->real_item()->type() == Item::FIELD_ITEM &&
7253     !(field->used_tables() & OUTER_REF_TABLE_BIT) &&
7254     !down_cast<Item_ident *>(field)->depended_from &&
7255     !down_cast<Item_ident *>(field->real_item())->depended_from;
7256 }
7257 
7258 
7259 /**
7260   Check if a row constructor expression is over columns in the same query block.
7261 
7262   @param item_row Row expression to check.
7263 
7264   @return boolean
7265   @retval true  The expression is a local column reference.
7266   @retval false It's something else.
7267 */
is_row_of_local_columns(Item_row * item_row)7268 static bool is_row_of_local_columns(Item_row *item_row)
7269 {
7270   for (uint i= 0; i < item_row->cols(); ++i)
7271     if (!is_local_field(item_row->element_index(i)))
7272       return false;
7273   return true;
7274 }
7275 
7276 
7277 /**
7278    The guts of the ref optimizer. This function, along with the other
7279    add_key_* functions, make up a recursive procedure that analyzes a
7280    condition expression (a tree of AND and OR predicates) and does
7281    many things.
7282 
7283    @param join The query block involving the condition.
7284 
7285    @param key_fields[in,out] Start of memory buffer, see below.
7286    @param and_level[in, out] Current 'and level', see below.
7287    @param cond The conditional expression to analyze.
7288    @param usable_tables Tables not in this bitmap will not be examined.
7289    @param sargables [in,out] End of memory buffer, see below.
7290 
7291    This documentation is the result of reverse engineering and may
7292    therefore not capture the full gist of the procedure, but it is
7293    known to do the following:
7294 
7295    - Populate a raw memory buffer from two directions at the same time. An
7296      'array' of Key_field objects fill the buffer from low to high addresses
7297      whilst an 'array' of SARGABLE_PARAM's fills the buffer from high to low
7298      addresses. At the first call to this function, it is assumed that
7299      key_fields points to the beginning of the buffer and sargables point to the
7300      end (except for a poor-mans 'null element' at the very end).
7301 
7302    - Update a number of properties in the JOIN_TAB's that can be used
7303      to find search keys (sargables).
7304 
7305      - JOIN_TAB::keys
7306      - JOIN_TAB::key_dependent
7307      - JOIN_TAB::const_keys (dictates if the range optimizer will be run
7308        later.)
7309 
7310    The Key_field objects are marked with something called an 'and_level', which
7311    does @b not correspond to their nesting depth within the expression tree. It
7312    is rather a tag to group conjunctions together. For instance, in the
7313    conditional expression
7314 
7315    @code
7316      a = 0 AND b = 0
7317    @endcode
7318 
7319    two Key_field's are produced, both having an and_level of 0.
7320 
7321    In an expression such as
7322 
7323    @code
7324      a = 0 AND b = 0 OR a = 1
7325    @endcode
7326 
7327    three Key_field's are produced, the first two corresponding to 'a = 0' and
7328    'b = 0', respectively, both with and_level 0. The third one corresponds to
7329    'a = 1' and has an and_level of 1.
7330 
7331    A separate function, merge_key_fields() performs ref access validation on
7332    the Key_field array on the recursice ascent. If some Key_field's cannot be
7333    used for ref access, the key_fields pointer is rolled back. All other
7334    modifications to the query plan remain.
7335 */
7336 static void
add_key_fields(JOIN * join,Key_field ** key_fields,uint * and_level,Item * cond,table_map usable_tables,SARGABLE_PARAM ** sargables)7337 add_key_fields(JOIN *join, Key_field **key_fields, uint *and_level,
7338                Item *cond, table_map usable_tables,
7339                SARGABLE_PARAM **sargables)
7340 {
7341   DBUG_ENTER("add_key_fields");
7342   if (cond->type() == Item_func::COND_ITEM)
7343   {
7344     List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
7345     Key_field *org_key_fields= *key_fields;
7346 
7347     if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
7348     {
7349       Item *item;
7350       while ((item=li++))
7351         add_key_fields(join, key_fields, and_level, item, usable_tables,
7352                        sargables);
7353       for (; org_key_fields != *key_fields ; org_key_fields++)
7354         org_key_fields->level= *and_level;
7355     }
7356     else
7357     {
7358       (*and_level)++;
7359       add_key_fields(join, key_fields, and_level, li++, usable_tables,
7360                      sargables);
7361       Item *item;
7362       while ((item=li++))
7363       {
7364         Key_field *start_key_fields= *key_fields;
7365         (*and_level)++;
7366         add_key_fields(join, key_fields, and_level, item, usable_tables,
7367                        sargables);
7368         *key_fields=merge_key_fields(org_key_fields,start_key_fields,
7369                                      *key_fields,++(*and_level));
7370       }
7371     }
7372     DBUG_VOID_RETURN;
7373   }
7374 
7375   /*
7376     Subquery optimization: Conditions that are pushed down into subqueries
7377     are wrapped into Item_func_trig_cond. We process the wrapped condition
7378     but need to set cond_guard for Key_use elements generated from it.
7379   */
7380   {
7381     if (cond->type() == Item::FUNC_ITEM &&
7382         ((Item_func*)cond)->functype() == Item_func::TRIG_COND_FUNC)
7383     {
7384       Item *cond_arg= ((Item_func*)cond)->arguments()[0];
7385       if (!join->group_list && !join->order &&
7386           join->unit->item &&
7387           join->unit->item->substype() == Item_subselect::IN_SUBS &&
7388           !join->unit->is_union())
7389       {
7390         Key_field *save= *key_fields;
7391         add_key_fields(join, key_fields, and_level, cond_arg, usable_tables,
7392                        sargables);
7393         // Indicate that this ref access candidate is for subquery lookup:
7394         for (; save != *key_fields; save++)
7395           save->cond_guard= ((Item_func_trig_cond*)cond)->get_trig_var();
7396       }
7397       DBUG_VOID_RETURN;
7398     }
7399   }
7400 
7401   /* If item is of type 'field op field/constant' add it to key_fields */
7402   if (cond->type() != Item::FUNC_ITEM)
7403     DBUG_VOID_RETURN;
7404   Item_func *cond_func= (Item_func*) cond;
7405   switch (cond_func->select_optimize()) {
7406   case Item_func::OPTIMIZE_NONE:
7407     break;
7408   case Item_func::OPTIMIZE_KEY:
7409   {
7410     Item **values;
7411     /*
7412       Build list of possible keys for 'a BETWEEN low AND high'.
7413       It is handled similar to the equivalent condition
7414       'a >= low AND a <= high':
7415     */
7416     if (cond_func->functype() == Item_func::BETWEEN)
7417     {
7418       Item_field *field_item;
7419       bool equal_func= FALSE;
7420       uint num_values= 2;
7421       values= cond_func->arguments();
7422 
7423       bool binary_cmp= (values[0]->real_item()->type() == Item::FIELD_ITEM)
7424             ? ((Item_field*)values[0]->real_item())->field->binary()
7425             : TRUE;
7426 
7427       /*
7428         Additional optimization: If 'low = high':
7429         Handle as if the condition was "t.key = low".
7430       */
7431       if (!((Item_func_between*)cond_func)->negated &&
7432           values[1]->eq(values[2], binary_cmp))
7433       {
7434         equal_func= TRUE;
7435         num_values= 1;
7436       }
7437 
7438       /*
7439         Append keys for 'field <cmp> value[]' if the
7440         condition is of the form::
7441         '<field> BETWEEN value[1] AND value[2]'
7442       */
7443       if (is_local_field (values[0]))
7444       {
7445         field_item= (Item_field *) (values[0]->real_item());
7446         add_key_equal_fields(key_fields, *and_level, cond_func,
7447                              field_item, equal_func, &values[1],
7448                              num_values, usable_tables, sargables);
7449       }
7450       /*
7451         Append keys for 'value[0] <cmp> field' if the
7452         condition is of the form:
7453         'value[0] BETWEEN field1 AND field2'
7454       */
7455       for (uint i= 1; i <= num_values; i++)
7456       {
7457         if (is_local_field (values[i]))
7458         {
7459           field_item= (Item_field *) (values[i]->real_item());
7460           add_key_equal_fields(key_fields, *and_level, cond_func,
7461                                field_item, equal_func, values,
7462                                1, usable_tables, sargables);
7463         }
7464       }
7465     } // if ( ... Item_func::BETWEEN)
7466 
7467     // The predicate is IN or !=
7468     else if (is_local_field (cond_func->key_item()) &&
7469             !(cond_func->used_tables() & OUTER_REF_TABLE_BIT))
7470     {
7471       values= cond_func->arguments()+1;
7472       if (cond_func->functype() == Item_func::NE_FUNC &&
7473         is_local_field (cond_func->arguments()[1]))
7474         values--;
7475       assert(cond_func->functype() != Item_func::IN_FUNC ||
7476              cond_func->argument_count() != 2);
7477       add_key_equal_fields(key_fields, *and_level, cond_func,
7478                            (Item_field*) (cond_func->key_item()->real_item()),
7479                            0, values,
7480                            cond_func->argument_count()-1,
7481                            usable_tables, sargables);
7482     }
7483     else if (cond_func->functype() == Item_func::IN_FUNC &&
7484              cond_func->key_item()->type() == Item::ROW_ITEM)
7485     {
7486       /*
7487         The condition is (column1, column2, ... ) IN ((const1_1, const1_2), ...)
7488         and there is an index on (column1, column2, ...)
7489 
7490         The code below makes sure that the row constructor on the lhs indeed
7491         contains only column references before calling add_key_field on them.
7492 
7493         We can't do a ref access on IN, yet here we are. Why? We need
7494         to run add_key_field() only because it verifies that there are
7495         only constant expressions in the rows on the IN's rhs, see
7496         comment above the call to add_key_field() below.
7497 
7498         Actually, We could in theory do a ref access if the IN rhs
7499         contained just a single row, but there is a hack in the parser
7500         causing such IN predicates be parsed as row equalities.
7501       */
7502       Item_row *lhs_row= static_cast<Item_row*>(cond_func->key_item());
7503       if (is_row_of_local_columns(lhs_row))
7504       {
7505         for (uint i= 0; i < lhs_row->cols(); ++i)
7506         {
7507           Item *const lhs_item= lhs_row->element_index(i)->real_item();
7508           assert(lhs_item->type() == Item::FIELD_ITEM);
7509           Item_field *const lhs_column= static_cast<Item_field*>(lhs_item);
7510           // j goes from 1 since arguments()[0] is the lhs of IN.
7511           for (uint j= 1; j < cond_func->argument_count(); ++j)
7512           {
7513             // Here we pick out the i:th column in the j:th row.
7514             Item *rhs_item= cond_func->arguments()[j];
7515             assert(rhs_item->type() == Item::ROW_ITEM);
7516             Item_row *rhs_row= static_cast<Item_row*>(rhs_item);
7517             assert(rhs_row->cols() == lhs_row->cols());
7518             Item **rhs_expr_ptr= rhs_row->addr(i);
7519             /*
7520               add_key_field() will write a Key_field on each call
7521               here, but we don't care, it will never be used. We only
7522               call it for the side effect: update JOIN_TAB::const_keys
7523               so the range optimizer can be invoked. We pass a
7524               scrap buffer and pointer here.
7525             */
7526             Key_field scrap_key_field= **key_fields;
7527             Key_field *scrap_key_field_ptr= &scrap_key_field;
7528             add_key_field(&scrap_key_field_ptr,
7529                           *and_level,
7530                           cond_func,
7531                           lhs_column,
7532                           true, // eq_func
7533                           rhs_expr_ptr,
7534                           1, // Number of expressions: one
7535                           usable_tables,
7536                           NULL); // sargables
7537             // The pointer is not supposed to increase by more than one.
7538             assert(scrap_key_field_ptr <= &scrap_key_field + 1);
7539           }
7540         }
7541       }
7542     }
7543     break;
7544   }
7545   case Item_func::OPTIMIZE_OP:
7546   {
7547     bool equal_func=(cond_func->functype() == Item_func::EQ_FUNC ||
7548 		     cond_func->functype() == Item_func::EQUAL_FUNC);
7549 
7550     if (is_local_field (cond_func->arguments()[0]))
7551     {
7552       add_key_equal_fields(key_fields, *and_level, cond_func,
7553 	                (Item_field*) (cond_func->arguments()[0])->real_item(),
7554 		           equal_func,
7555                            cond_func->arguments()+1, 1, usable_tables,
7556                            sargables);
7557     }
7558     if (is_local_field (cond_func->arguments()[1]) &&
7559 	cond_func->functype() != Item_func::LIKE_FUNC)
7560     {
7561       add_key_equal_fields(key_fields, *and_level, cond_func,
7562                        (Item_field*) (cond_func->arguments()[1])->real_item(),
7563 		           equal_func,
7564                            cond_func->arguments(),1,usable_tables,
7565                            sargables);
7566     }
7567     break;
7568   }
7569   case Item_func::OPTIMIZE_NULL:
7570     /* column_name IS [NOT] NULL */
7571     if (is_local_field (cond_func->arguments()[0]) &&
7572 	!(cond_func->used_tables() & OUTER_REF_TABLE_BIT))
7573     {
7574       Item *tmp=new Item_null;
7575       if (unlikely(!tmp))                       // Should never be true
7576         DBUG_VOID_RETURN;
7577       add_key_equal_fields(key_fields, *and_level, cond_func,
7578 		    (Item_field*) (cond_func->arguments()[0])->real_item(),
7579 		    cond_func->functype() == Item_func::ISNULL_FUNC,
7580 			   &tmp, 1, usable_tables, sargables);
7581     }
7582     break;
7583   case Item_func::OPTIMIZE_EQUAL:
7584     Item_equal *item_equal= (Item_equal *) cond;
7585     Item *const_item= item_equal->get_const();
7586     if (const_item)
7587     {
7588       /*
7589         For each field field1 from item_equal consider the equality
7590         field1=const_item as a condition allowing an index access of the table
7591         with field1 by the keys value of field1.
7592       */
7593       Item_equal_iterator it(*item_equal);
7594       Item_field *item;
7595       while ((item= it++))
7596       {
7597         add_key_field(key_fields, *and_level, cond_func, item,
7598                       TRUE, &const_item, 1, usable_tables, sargables);
7599       }
7600     }
7601     else
7602     {
7603       /*
7604         Consider all pairs of different fields included into item_equal.
7605         For each of them (field1, field1) consider the equality
7606         field1=field2 as a condition allowing an index access of the table
7607         with field1 by the keys value of field2.
7608       */
7609       Item_equal_iterator outer_it(*item_equal);
7610       Item_equal_iterator inner_it(*item_equal);
7611       Item_field *outer;
7612       while ((outer= outer_it++))
7613       {
7614         Item_field *inner;
7615         while ((inner= inner_it++))
7616         {
7617           if (!outer->field->eq(inner->field))
7618             add_key_field(key_fields, *and_level, cond_func, outer,
7619                           true, (Item **) &inner, 1, usable_tables,
7620                           sargables);
7621         }
7622         inner_it.rewind();
7623       }
7624     }
7625     break;
7626   }
7627   DBUG_VOID_RETURN;
7628 }
7629 
7630 
7631 /*
7632   Add all keys with uses 'field' for some keypart
7633   If field->and_level != and_level then only mark key_part as const_part
7634 
7635   RETURN
7636    0 - OK
7637    1 - Out of memory.
7638 */
7639 
7640 static bool
add_key_part(Key_use_array * keyuse_array,Key_field * key_field)7641 add_key_part(Key_use_array *keyuse_array, Key_field *key_field)
7642 {
7643   if (key_field->eq_func && !(key_field->optimize & KEY_OPTIMIZE_EXISTS))
7644   {
7645     Field *const field= key_field->item_field->field;
7646     TABLE_LIST *const tl= key_field->item_field->table_ref;
7647     TABLE *const table= tl->table;
7648 
7649     for (uint key=0 ; key < table->s->keys ; key++)
7650     {
7651       if (!(table->keys_in_use_for_query.is_set(key)))
7652 	continue;
7653       if (table->key_info[key].flags & (HA_FULLTEXT | HA_SPATIAL))
7654 	continue;    // ToDo: ft-keys in non-ft queries.   SerG
7655 
7656       uint key_parts= actual_key_parts(&table->key_info[key]);
7657       for (uint part=0 ; part <  key_parts ; part++)
7658       {
7659 	if (field->eq(table->key_info[key].key_part[part].field))
7660 	{
7661           const Key_use keyuse(tl,
7662                                key_field->val,
7663                                key_field->val->used_tables(),
7664                                key,
7665                                part,
7666                                key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL,
7667                                (key_part_map) 1 << part,
7668                                ~(ha_rows) 0, // will be set in optimize_keyuse
7669                                key_field->null_rejecting,
7670                                key_field->cond_guard,
7671                                key_field->sj_pred_no);
7672           if (keyuse_array->push_back(keyuse))
7673             return true;              /* purecov: inspected */
7674 	}
7675       }
7676     }
7677   }
7678   return false;
7679 }
7680 
7681 
7682 /**
7683    Function parses WHERE condition and add key_use for FT index
7684    into key_use array if suitable MATCH function is found.
7685    Condition should be a set of AND expression, OR is not supported.
7686    MATCH function should be a part of simple expression.
7687    Simple expression is MATCH only function or MATCH is a part of
7688    comparison expression ('>=' or '>' operations are supported).
7689    It also sets FT_HINTS values(op_type, op_value).
7690 
7691    @param keyuse_array      Key_use array
7692    @param stat              JOIN_TAB structure
7693    @param cond              WHERE condition
7694    @param usable_tables     usable tables
7695    @param simple_match_expr true if this is the first call false otherwise.
7696                             if MATCH function is found at first call it means
7697                             that MATCH is simple expression, otherwise, in case
7698                             of AND/OR condition this parameter will be false.
7699 
7700    @retval
7701    true if FT key was added to Key_use array
7702    @retval
7703    false if no key was added to Key_use array
7704 
7705 */
7706 
7707 static bool
add_ft_keys(Key_use_array * keyuse_array,JOIN_TAB * stat,Item * cond,table_map usable_tables,bool simple_match_expr)7708 add_ft_keys(Key_use_array *keyuse_array,
7709             JOIN_TAB *stat,Item *cond,table_map usable_tables,
7710             bool simple_match_expr)
7711 {
7712   Item_func_match *cond_func=NULL;
7713 
7714   if (!cond)
7715     return FALSE;
7716 
7717   if (cond->type() == Item::FUNC_ITEM)
7718   {
7719     Item_func *func=(Item_func *)cond;
7720     Item_func::Functype functype=  func->functype();
7721     enum ft_operation op_type= FT_OP_NO;
7722     double op_value= 0.0;
7723     if (functype == Item_func::FT_FUNC)
7724     {
7725       cond_func= ((Item_func_match *) cond)->get_master();
7726       cond_func->set_hints_op(op_type, op_value);
7727     }
7728     else if (func->arg_count == 2)
7729     {
7730       Item *arg0=(func->arguments()[0]),
7731            *arg1=(func->arguments()[1]);
7732       if (arg1->const_item() &&
7733            arg0->type() == Item::FUNC_ITEM &&
7734            ((Item_func *) arg0)->functype() == Item_func::FT_FUNC &&
7735           ((functype == Item_func::GE_FUNC &&
7736             (op_value= arg1->val_real()) > 0) ||
7737            (functype == Item_func::GT_FUNC &&
7738             (op_value= arg1->val_real()) >=0)))
7739       {
7740         cond_func= ((Item_func_match *) arg0)->get_master();
7741         if (functype == Item_func::GE_FUNC)
7742           op_type= FT_OP_GE;
7743         else if (functype == Item_func::GT_FUNC)
7744           op_type= FT_OP_GT;
7745         cond_func->set_hints_op(op_type, op_value);
7746       }
7747       else if (arg0->const_item() &&
7748                 arg1->type() == Item::FUNC_ITEM &&
7749                 ((Item_func *) arg1)->functype() == Item_func::FT_FUNC &&
7750                ((functype == Item_func::LE_FUNC &&
7751                  (op_value= arg0->val_real()) > 0) ||
7752                 (functype == Item_func::LT_FUNC &&
7753                  (op_value= arg0->val_real()) >=0)))
7754       {
7755         cond_func= ((Item_func_match *) arg1)->get_master();
7756         if (functype == Item_func::LE_FUNC)
7757           op_type= FT_OP_GE;
7758         else if (functype == Item_func::LT_FUNC)
7759           op_type= FT_OP_GT;
7760         cond_func->set_hints_op(op_type, op_value);
7761       }
7762     }
7763   }
7764   else if (cond->type() == Item::COND_ITEM)
7765   {
7766     List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
7767 
7768     if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
7769     {
7770       Item *item;
7771       while ((item=li++))
7772       {
7773         if (add_ft_keys(keyuse_array, stat, item, usable_tables, false))
7774           return TRUE;
7775       }
7776     }
7777   }
7778 
7779   if (!cond_func || cond_func->key == NO_SUCH_KEY ||
7780       !(usable_tables & cond_func->table_ref->map()))
7781     return FALSE;
7782 
7783   cond_func->set_simple_expression(simple_match_expr);
7784 
7785   const Key_use keyuse(cond_func->table_ref,
7786                        cond_func,
7787                        cond_func->key_item()->used_tables(),
7788                        cond_func->key,
7789                        FT_KEYPART,
7790                        0,             // optimize
7791                        0,             // keypart_map
7792                        ~(ha_rows)0,   // ref_table_rows
7793                        false,         // null_rejecting
7794                        NULL,          // cond_guard
7795                        UINT_MAX);     // sj_pred_no
7796   return keyuse_array->push_back(keyuse);
7797 }
7798 
7799 
7800 /**
7801   Compares two keyuse elements.
7802 
7803   @param a first Key_use element
7804   @param b second Key_use element
7805 
7806   Compare Key_use elements so that they are sorted as follows:
7807     -# By table.
7808     -# By key for each table.
7809     -# By keypart for each key.
7810     -# Const values.
7811     -# Ref_or_null.
7812 
7813   @retval  0 If a = b.
7814   @retval <0 If a < b.
7815   @retval >0 If a > b.
7816 */
sort_keyuse(Key_use * a,Key_use * b)7817 static int sort_keyuse(Key_use *a, Key_use *b)
7818 {
7819   int res;
7820   if (a->table_ref->tableno() != b->table_ref->tableno())
7821     return (int) (a->table_ref->tableno() - b->table_ref->tableno());
7822   if (a->key != b->key)
7823     return (int) (a->key - b->key);
7824   if (a->keypart != b->keypart)
7825     return (int) (a->keypart - b->keypart);
7826   // Place const values before other ones
7827   if ((res= MY_TEST((a->used_tables & ~OUTER_REF_TABLE_BIT)) -
7828        MY_TEST((b->used_tables & ~OUTER_REF_TABLE_BIT))))
7829     return res;
7830   /* Place rows that are not 'OPTIMIZE_REF_OR_NULL' first */
7831   return (int) ((a->optimize & KEY_OPTIMIZE_REF_OR_NULL) -
7832 		(b->optimize & KEY_OPTIMIZE_REF_OR_NULL));
7833 }
7834 
7835 
7836 /*
7837   Add to Key_field array all 'ref' access candidates within nested join.
7838 
7839     This function populates Key_field array with entries generated from the
7840     ON condition of the given nested join, and does the same for nested joins
7841     contained within this nested join.
7842 
7843   @param[in]      nested_join_table   Nested join pseudo-table to process
7844   @param[in,out]  end                 End of the key field array
7845   @param[in,out]  and_level           And-level
7846   @param[in,out]  sargables           Array of found sargable candidates
7847 
7848 
7849   @note
7850     We can add accesses to the tables that are direct children of this nested
7851     join (1), and are not inner tables w.r.t their neighbours (2).
7852 
7853     Example for #1 (outer brackets pair denotes nested join this function is
7854     invoked for):
7855     @code
7856      ... LEFT JOIN (t1 LEFT JOIN (t2 ... ) ) ON cond
7857     @endcode
7858     Example for #2:
7859     @code
7860      ... LEFT JOIN (t1 LEFT JOIN t2 ) ON cond
7861     @endcode
7862     In examples 1-2 for condition cond, we can add 'ref' access candidates to
7863     t1 only.
7864     Example #3:
7865     @code
7866      ... LEFT JOIN (t1, t2 LEFT JOIN t3 ON inner_cond) ON cond
7867     @endcode
7868     Here we can add 'ref' access candidates for t1 and t2, but not for t3.
7869 */
7870 
add_key_fields_for_nj(JOIN * join,TABLE_LIST * nested_join_table,Key_field ** end,uint * and_level,SARGABLE_PARAM ** sargables)7871 static void add_key_fields_for_nj(JOIN *join, TABLE_LIST *nested_join_table,
7872                                   Key_field **end, uint *and_level,
7873                                   SARGABLE_PARAM **sargables)
7874 {
7875   List_iterator<TABLE_LIST> li(nested_join_table->nested_join->join_list);
7876   List_iterator<TABLE_LIST> li2(nested_join_table->nested_join->join_list);
7877   bool have_another = FALSE;
7878   table_map tables= 0;
7879   TABLE_LIST *table;
7880   assert(nested_join_table->nested_join);
7881 
7882   while ((table= li++) || (have_another && (li=li2, have_another=FALSE,
7883                                             (table= li++))))
7884   {
7885     if (table->nested_join)
7886     {
7887       if (!table->join_cond_optim())
7888       {
7889         /* It's a semi-join nest. Walk into it as if it wasn't a nest */
7890         have_another= TRUE;
7891         li2= li;
7892         li= List_iterator<TABLE_LIST>(table->nested_join->join_list);
7893       }
7894       else
7895         add_key_fields_for_nj(join, table, end, and_level, sargables);
7896     }
7897     else
7898       if (!table->join_cond_optim())
7899         tables|= table->map();
7900   }
7901   if (nested_join_table->join_cond_optim())
7902     add_key_fields(join, end, and_level, nested_join_table->join_cond_optim(),
7903                    tables, sargables);
7904 }
7905 
7906 
7907 ///  @} (end of group RefOptimizerModule)
7908 
7909 
7910 /**
7911   Check for the presence of AGGFN(DISTINCT a) queries that may be subject
7912   to loose index scan.
7913 
7914 
7915   Check if the query is a subject to AGGFN(DISTINCT) using loose index scan
7916   (QUICK_GROUP_MIN_MAX_SELECT).
7917   Optionally (if out_args is supplied) will push the arguments of
7918   AGGFN(DISTINCT) to the list
7919 
7920   Check for every COUNT(DISTINCT), AVG(DISTINCT) or
7921   SUM(DISTINCT). These can be resolved by Loose Index Scan as long
7922   as all the aggregate distinct functions refer to the same
7923   fields. Thus:
7924 
7925   SELECT AGGFN(DISTINCT a, b), AGGFN(DISTINCT b, a)... => can use LIS
7926   SELECT AGGFN(DISTINCT a),    AGGFN(DISTINCT a)   ... => can use LIS
7927   SELECT AGGFN(DISTINCT a, b), AGGFN(DISTINCT a)   ... => cannot use LIS
7928   SELECT AGGFN(DISTINCT a),    AGGFN(DISTINCT b)   ... => cannot use LIS
7929   etc.
7930 
7931   @param      join       the join to check
7932   @param[out] out_args   Collect the arguments of the aggregate functions
7933                          to a list. We don't worry about duplicates as
7934                          these will be sorted out later in
7935                          get_best_group_min_max.
7936 
7937   @return                does the query qualify for indexed AGGFN(DISTINCT)
7938     @retval   true       it does
7939     @retval   false      AGGFN(DISTINCT) must apply distinct in it.
7940 */
7941 
7942 bool
is_indexed_agg_distinct(JOIN * join,List<Item_field> * out_args)7943 is_indexed_agg_distinct(JOIN *join, List<Item_field> *out_args)
7944 {
7945   Item_sum **sum_item_ptr;
7946   bool result= false;
7947   Field_map first_aggdistinct_fields;
7948 
7949   if (join->primary_tables > 1 ||             /* reference more than 1 table */
7950       join->select_distinct ||                /* or a DISTINCT */
7951       join->select_lex->olap == ROLLUP_TYPE)  /* Check (B3) for ROLLUP */
7952     return false;
7953 
7954   if (join->make_sum_func_list(join->all_fields, join->fields_list, true))
7955     return false;
7956 
7957   for (sum_item_ptr= join->sum_funcs; *sum_item_ptr; sum_item_ptr++)
7958   {
7959     Item_sum *sum_item= *sum_item_ptr;
7960     Field_map cur_aggdistinct_fields;
7961     Item *expr;
7962     /* aggregate is not AGGFN(DISTINCT) or more than 1 argument to it */
7963     switch (sum_item->sum_func())
7964     {
7965       case Item_sum::MIN_FUNC:
7966       case Item_sum::MAX_FUNC:
7967         continue;
7968       case Item_sum::COUNT_DISTINCT_FUNC:
7969         break;
7970       case Item_sum::AVG_DISTINCT_FUNC:
7971       case Item_sum::SUM_DISTINCT_FUNC:
7972         if (sum_item->get_arg_count() == 1)
7973           break;
7974         /* fall through */
7975       default: return false;
7976     }
7977 
7978     for (uint i= 0; i < sum_item->get_arg_count(); i++)
7979     {
7980       expr= sum_item->get_arg(i);
7981       /* The AGGFN(DISTINCT) arg is not an attribute? */
7982       if (expr->real_item()->type() != Item::FIELD_ITEM)
7983         return false;
7984 
7985       Item_field* item= static_cast<Item_field*>(expr->real_item());
7986       if (out_args)
7987         out_args->push_back(item);
7988 
7989       cur_aggdistinct_fields.set_bit(item->field->field_index);
7990       result= true;
7991     }
7992     /*
7993       If there are multiple aggregate functions, make sure that they all
7994       refer to exactly the same set of columns.
7995     */
7996     if (first_aggdistinct_fields.is_clear_all())
7997       first_aggdistinct_fields.merge(cur_aggdistinct_fields);
7998     else if (first_aggdistinct_fields != cur_aggdistinct_fields)
7999       return false;
8000   }
8001 
8002   return result;
8003 }
8004 
8005 
8006 /**
8007   Print keys that were appended to join_tab->const_keys because they
8008   can be used for GROUP BY or DISTINCT to the optimizer trace.
8009 
8010   @param trace     The optimizer trace context we're adding info to
8011   @param join_tab  The table the indexes cover
8012   @param new_keys  The keys that are considered useful because they can
8013                    be used for GROUP BY or DISTINCT
8014   @param cause     Zero-terminated string with reason for adding indexes
8015                    to const_keys
8016 
8017   @see add_group_and_distinct_keys()
8018  */
trace_indexes_added_group_distinct(Opt_trace_context * trace,const JOIN_TAB * join_tab,const key_map new_keys,const char * cause)8019 static void trace_indexes_added_group_distinct(Opt_trace_context *trace,
8020                                                const JOIN_TAB *join_tab,
8021                                                const key_map new_keys,
8022                                                const char* cause)
8023 {
8024 #ifdef OPTIMIZER_TRACE
8025   if (likely(!trace->is_started()))
8026     return;
8027 
8028   KEY *key_info= join_tab->table()->key_info;
8029   key_map existing_keys= join_tab->const_keys;
8030   uint nbrkeys= join_tab->table()->s->keys;
8031 
8032   Opt_trace_object trace_summary(trace, "const_keys_added");
8033   {
8034     Opt_trace_array trace_key(trace,"keys");
8035     for (uint j= 0 ; j < nbrkeys ; j++)
8036       if (new_keys.is_set(j) && !existing_keys.is_set(j))
8037         trace_key.add_utf8(key_info[j].name);
8038   }
8039   trace_summary.add_alnum("cause", cause);
8040 #endif
8041 }
8042 
8043 
8044 /**
8045   Discover the indexes that might be used for GROUP BY or DISTINCT queries.
8046 
8047   If the query has a GROUP BY clause, find all indexes that contain
8048   all GROUP BY fields, and add those indexes to join_tab->const_keys
8049   and join_tab->keys.
8050 
8051   If the query has a DISTINCT clause, find all indexes that contain
8052   all SELECT fields, and add those indexes to join_tab->const_keys and
8053   join_tab->keys. This allows later on such queries to be processed by
8054   a QUICK_GROUP_MIN_MAX_SELECT.
8055 
8056   Note that indexes that are not usable for resolving GROUP
8057   BY/DISTINCT may also be added in some corner cases. For example, an
8058   index covering 'a' and 'b' is not usable for the following query but
8059   is still added: "SELECT DISTINCT a+b FROM t1". This is not a big
8060   issue because a) although the optimizer will consider using the
8061   index, it will not chose it (so minor calculation cost added but not
8062   wrong result) and b) it applies only to corner cases.
8063 
8064   @param join
8065   @param join_tab
8066 
8067   @return
8068     None
8069 */
8070 
8071 static void
add_group_and_distinct_keys(JOIN * join,JOIN_TAB * join_tab)8072 add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab)
8073 {
8074   assert(join_tab->const_keys.is_subset(join_tab->keys()));
8075 
8076   List<Item_field> indexed_fields;
8077   List_iterator<Item_field> indexed_fields_it(indexed_fields);
8078   ORDER      *cur_group;
8079   Item_field *cur_item;
8080   const char *cause;
8081 
8082   if (join->group_list)
8083   { /* Collect all query fields referenced in the GROUP clause. */
8084     for (cur_group= join->group_list; cur_group; cur_group= cur_group->next)
8085       (*cur_group->item)->walk(&Item::collect_item_field_processor,
8086                                Item::WALK_POSTFIX,
8087                                (uchar*) &indexed_fields);
8088     cause= "group_by";
8089   }
8090   else if (join->select_distinct)
8091   { /* Collect all query fields referenced in the SELECT clause. */
8092     List<Item> &select_items= join->fields_list;
8093     List_iterator<Item> select_items_it(select_items);
8094     Item *item;
8095     while ((item= select_items_it++))
8096       item->walk(&Item::collect_item_field_processor,
8097                  Item::WALK_POSTFIX,
8098                  (uchar*) &indexed_fields);
8099     cause= "distinct";
8100   }
8101   else if (join->tmp_table_param.sum_func_count &&
8102            is_indexed_agg_distinct(join, &indexed_fields))
8103   {
8104     /*
8105       SELECT list with AGGFN(distinct col). The query qualifies for
8106       loose index scan, and is_indexed_agg_distinct() has already
8107       collected all referenced fields into indexed_fields.
8108     */
8109     join->sort_and_group= 1;
8110     cause= "indexed_distinct_aggregate";
8111   }
8112   else
8113     return;
8114 
8115   if (indexed_fields.elements == 0)
8116     return;
8117 
8118   key_map possible_keys;
8119   possible_keys.set_all();
8120 
8121   /* Intersect the keys of all group fields. */
8122   while ((cur_item= indexed_fields_it++))
8123   {
8124     if (cur_item->used_tables() != join_tab->table_ref->map())
8125     {
8126       /*
8127         Doing GROUP BY or DISTINCT on a field in another table so no
8128         index in this table is usable
8129       */
8130       return;
8131     }
8132     else
8133       possible_keys.intersect(cur_item->field->part_of_key);
8134   }
8135 
8136   /*
8137     At this point, possible_keys has key bits set only for usable
8138     indexes because indexed_fields is non-empty and if any of the
8139     fields belong to a different table the function would exit in the
8140     loop above.
8141   */
8142 
8143   if (!possible_keys.is_clear_all() &&
8144       !possible_keys.is_subset(join_tab->const_keys))
8145   {
8146     trace_indexes_added_group_distinct(&join->thd->opt_trace, join_tab,
8147                                        possible_keys, cause);
8148     join_tab->const_keys.merge(possible_keys);
8149     join_tab->keys().merge(possible_keys);
8150   }
8151 
8152   assert(join_tab->const_keys.is_subset(join_tab->keys()));
8153 }
8154 
8155 /**
8156   Update keyuse array with all possible keys we can use to fetch rows.
8157 
8158   @param       thd
8159   @param[out]  keyuse         Put here ordered array of Key_use structures
8160   @param       join_tab       Array in table number order
8161   @param       tables         Number of tables in join
8162   @param       cond           WHERE condition (note that the function analyzes
8163                               join_tab[i]->join_cond() too)
8164   @param       normal_tables  Tables not inner w.r.t some outer join (ones
8165                               for which we can make ref access based the WHERE
8166                               clause)
8167   @param       select_lex     current SELECT
8168   @param[out]  sargables      Array of found sargable candidates
8169 
8170    @retval
8171      0  OK
8172    @retval
8173      1  Out of memory.
8174 */
8175 
8176 static bool
update_ref_and_keys(THD * thd,Key_use_array * keyuse,JOIN_TAB * join_tab,uint tables,Item * cond,COND_EQUAL * cond_equal,table_map normal_tables,SELECT_LEX * select_lex,SARGABLE_PARAM ** sargables)8177 update_ref_and_keys(THD *thd, Key_use_array *keyuse,JOIN_TAB *join_tab,
8178                     uint tables, Item *cond, COND_EQUAL *cond_equal,
8179                     table_map normal_tables, SELECT_LEX *select_lex,
8180                     SARGABLE_PARAM **sargables)
8181 {
8182   uint	and_level,i,found_eq_constant;
8183   Key_field *key_fields, *end, *field;
8184   size_t sz;
8185   uint m= max(select_lex->max_equal_elems, 1U);
8186   JOIN *const join= select_lex->join;
8187   /*
8188     We use the same piece of memory to store both  Key_field
8189     and SARGABLE_PARAM structure.
8190     Key_field values are placed at the beginning this memory
8191     while  SARGABLE_PARAM values are put at the end.
8192     All predicates that are used to fill arrays of Key_field
8193     and SARGABLE_PARAM structures have at most 2 arguments
8194     except BETWEEN predicates that have 3 arguments and
8195     IN predicates.
8196     This any predicate if it's not BETWEEN/IN can be used
8197     directly to fill at most 2 array elements, either of Key_field
8198     or SARGABLE_PARAM type. For a BETWEEN predicate 3 elements
8199     can be filled as this predicate is considered as
8200     saragable with respect to each of its argument.
8201     An IN predicate can require at most 1 element as currently
8202     it is considered as sargable only for its first argument.
8203     Multiple equality can add  elements that are filled after
8204     substitution of field arguments by equal fields. There
8205     can be not more than select_lex->max_equal_elems such
8206     substitutions.
8207   */
8208   sz= max(sizeof(Key_field), sizeof(SARGABLE_PARAM)) *
8209     (((select_lex->cond_count + 1) * 2 +
8210       select_lex->between_count) * m + 1);
8211   if (!(key_fields=(Key_field*)	thd->alloc(sz)))
8212     return TRUE; /* purecov: inspected */
8213   and_level= 0;
8214   field= end= key_fields;
8215   *sargables= (SARGABLE_PARAM *) key_fields +
8216     (sz - sizeof((*sargables)[0].field))/sizeof(SARGABLE_PARAM);
8217   /* set a barrier for the array of SARGABLE_PARAM */
8218   (*sargables)[0].field= 0;
8219 
8220   if (cond)
8221   {
8222     add_key_fields(join, &end, &and_level, cond, normal_tables, sargables);
8223     for (Key_field *fld= field; fld != end ; fld++)
8224     {
8225       /* Mark that we can optimize LEFT JOIN */
8226       if (fld->val->type() == Item::NULL_ITEM &&
8227           !fld->item_field->field->real_maybe_null())
8228       {
8229         /*
8230           Example:
8231           SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.a WHERE t2.a IS NULL;
8232           this just wants rows of t1 where t1.a does not exist in t2.
8233         */
8234         fld->item_field->field->table->reginfo.not_exists_optimize= true;
8235       }
8236     }
8237   }
8238 
8239   for (i=0 ; i < tables ; i++)
8240   {
8241     /*
8242       Block the creation of keys for inner tables of outer joins.
8243       Here only the outer joins that can not be converted to
8244       inner joins are left and all nests that can be eliminated
8245       are flattened.
8246       In the future when we introduce conditional accesses
8247       for inner tables in outer joins these keys will be taken
8248       into account as well.
8249     */
8250     if (join_tab[i].join_cond())
8251       add_key_fields(join, &end, &and_level,
8252                      join_tab[i].join_cond(),
8253                      join_tab[i].table_ref->map(), sargables);
8254   }
8255 
8256   /* Process ON conditions for the nested joins */
8257   {
8258     List_iterator<TABLE_LIST> li(select_lex->top_join_list);
8259     TABLE_LIST *tl;
8260     while ((tl= li++))
8261     {
8262       if (tl->nested_join)
8263         add_key_fields_for_nj(join, tl, &end, &and_level, sargables);
8264     }
8265   }
8266 
8267   /* Generate keys descriptions for derived tables */
8268   if (select_lex->materialized_derived_table_count)
8269   {
8270     if (join->generate_derived_keys())
8271       return true;
8272   }
8273   /* fill keyuse with found key parts */
8274   for ( ; field != end ; field++)
8275   {
8276     if (add_key_part(keyuse,field))
8277       return true;
8278   }
8279 
8280   if (select_lex->ftfunc_list->elements)
8281   {
8282     if (add_ft_keys(keyuse, join_tab, cond, normal_tables, true))
8283       return true;
8284   }
8285 
8286   /*
8287     Sort the array of possible keys and remove the following key parts:
8288     - ref if there is a keypart which is a ref and a const.
8289       (e.g. if there is a key(a,b) and the clause is a=3 and b=7 and b=t2.d,
8290       then we skip the key part corresponding to b=t2.d)
8291     - keyparts without previous keyparts
8292       (e.g. if there is a key(a,b,c) but only b < 5 (or a=2 and c < 3) is
8293       used in the query, we drop the partial key parts from consideration).
8294     Special treatment for ft-keys.
8295   */
8296   if (!keyuse->empty())
8297   {
8298     Key_use *save_pos, *use;
8299 
8300     my_qsort(keyuse->begin(), keyuse->size(), keyuse->element_size(),
8301              reinterpret_cast<qsort_cmp>(sort_keyuse));
8302 
8303     const Key_use key_end(NULL, NULL, 0, 0, 0, 0, 0, 0, false, NULL, 0);
8304     if (keyuse->push_back(key_end)) // added for easy testing
8305       return TRUE;
8306 
8307     use= save_pos= keyuse->begin();
8308     const Key_use *prev= &key_end;
8309     found_eq_constant=0;
8310     for (i=0 ; i < keyuse->size()-1 ; i++,use++)
8311     {
8312       TABLE *const table= use->table_ref->table;
8313       if (!use->used_tables && use->optimize != KEY_OPTIMIZE_REF_OR_NULL)
8314         table->const_key_parts[use->key]|= use->keypart_map;
8315       if (use->keypart != FT_KEYPART)
8316       {
8317         if (use->key == prev->key && use->table_ref == prev->table_ref)
8318         {
8319           if (prev->keypart+1 < use->keypart ||
8320               (prev->keypart == use->keypart && found_eq_constant))
8321             continue; /* remove */
8322         }
8323         else if (use->keypart != 0) // First found must be 0
8324           continue;
8325       }
8326 
8327 #if defined(__GNUC__) && !MY_GNUC_PREREQ(4,4)
8328       /*
8329         Old gcc used a memcpy(), which is undefined if save_pos==use:
8330         http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19410
8331         http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39480
8332       */
8333       if (save_pos != use)
8334 #endif
8335         *save_pos= *use;
8336       prev=use;
8337       found_eq_constant= !use->used_tables;
8338       /* Save ptr to first use */
8339       if (!table->reginfo.join_tab->keyuse())
8340         table->reginfo.join_tab->set_keyuse(save_pos);
8341       table->reginfo.join_tab->checked_keys.set_bit(use->key);
8342       save_pos++;
8343     }
8344     i= (uint) (save_pos - keyuse->begin());
8345     keyuse->at(i) = key_end;
8346     keyuse->chop(i);
8347   }
8348   print_keyuse_array(&thd->opt_trace, keyuse);
8349 
8350   return false;
8351 }
8352 
8353 
8354 /**
8355   Create a keyuse array for a table with a primary key.
8356   To be used when creating a materialized temporary table.
8357 
8358   @param thd         THD pointer, for memory allocation
8359   @param table       Table object representing table
8360   @param keyparts    Number of key parts in the primary key
8361   @param outer_exprs List of items used for key lookup
8362 
8363   @return Pointer to created keyuse array, or NULL if error
8364 */
create_keyuse_for_table(THD * thd,TABLE * table,uint keyparts,Item_field ** fields,List<Item> outer_exprs)8365 Key_use_array *create_keyuse_for_table(THD *thd, TABLE *table, uint keyparts,
8366                                        Item_field **fields,
8367                                        List<Item> outer_exprs)
8368 {
8369   void *mem= thd->alloc(sizeof(Key_use_array));
8370   if (!mem)
8371     return NULL;
8372   Key_use_array *keyuses= new (mem) Key_use_array(thd->mem_root);
8373 
8374   List_iterator<Item> outer_expr(outer_exprs);
8375 
8376   for (uint keypartno= 0; keypartno < keyparts; keypartno++)
8377   {
8378     Item *const item= outer_expr++;
8379     Key_field key_field(fields[keypartno], item, 0, 0, true,
8380                         // null_rejecting must be true for field items only,
8381                         // add_not_null_conds() is incapable of handling
8382                         // other item types.
8383                         (item->type() == Item::FIELD_ITEM),
8384                         NULL, UINT_MAX);
8385     if (add_key_part(keyuses, &key_field))
8386       return NULL;
8387   }
8388   const Key_use key_end(NULL, NULL, 0, 0, 0, 0, 0, 0, false, NULL, 0);
8389   if (keyuses->push_back(key_end)) // added for easy testing
8390     return NULL;
8391 
8392   return keyuses;
8393 }
8394 
8395 
8396 /**
8397   Move const tables first in the position array.
8398 
8399   Increment the number of const tables and set same basic properties for the
8400   const table.
8401   A const table looked up by a key has type JT_CONST.
8402   A const table with a single row has type JT_SYSTEM.
8403 
8404   @param tab    Table that is designated as a const table
8405   @param key    The key definition to use for this table (NULL if table scan)
8406 */
8407 
mark_const_table(JOIN_TAB * tab,Key_use * key)8408 void JOIN::mark_const_table(JOIN_TAB *tab, Key_use *key)
8409 {
8410   POSITION *const position= positions + const_tables;
8411   position->table= tab;
8412   position->key= key;
8413   position->rows_fetched= 1.0;               // This is a const table
8414   position->filter_effect= 1.0;
8415   position->prefix_rowcount= 1.0;
8416   position->read_cost= 0.0;
8417   position->ref_depend_map= 0;
8418   position->loosescan_key= MAX_KEY;    // Not a LooseScan
8419   position->sj_strategy= SJ_OPT_NONE;
8420   positions->use_join_buffer= false;
8421 
8422   // Move the const table as far down as possible in best_ref
8423   JOIN_TAB **pos= best_ref + const_tables + 1;
8424   for (JOIN_TAB *next= best_ref[const_tables]; next != tab; pos++)
8425   {
8426     JOIN_TAB *const tmp= pos[0];
8427     pos[0]= next;
8428     next= tmp;
8429   }
8430   best_ref[const_tables]= tab;
8431 
8432   tab->set_type(key ? JT_CONST : JT_SYSTEM);
8433 
8434   const_table_map|= tab->table_ref->map();
8435 
8436   const_tables++;
8437 }
8438 
8439 
make_outerjoin_info()8440 void JOIN::make_outerjoin_info()
8441 {
8442   DBUG_ENTER("JOIN::make_outerjoin_info");
8443 
8444   assert(select_lex->outer_join);
8445   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
8446 
8447   select_lex->reset_nj_counters();
8448 
8449   for (uint i= const_tables; i < tables; ++i)
8450   {
8451     JOIN_TAB *const tab= best_ref[i];
8452     TABLE *const table= tab->table();
8453     if (!table)
8454       continue;
8455 
8456     TABLE_LIST *const tbl= tab->table_ref;
8457 
8458     if (tbl->outer_join)
8459     {
8460       /*
8461         Table tab is the only one inner table for outer join.
8462         (Like table t4 for the table reference t3 LEFT JOIN t4 ON t3.a=t4.a
8463         is in the query above.)
8464       */
8465       tab->set_last_inner(i);
8466       tab->set_first_inner(i);
8467       tab->init_join_cond_ref(tbl);
8468       tab->cond_equal= tbl->cond_equal;
8469       /*
8470         If this outer join nest is embedded in another join nest,
8471         link the join-tabs:
8472       */
8473       TABLE_LIST *const outer_join_nest= tbl->outer_join_nest();
8474       if (outer_join_nest)
8475         tab->set_first_upper(outer_join_nest->nested_join->first_nested);
8476     }
8477     for (TABLE_LIST *embedding= tbl->embedding;
8478          embedding;
8479          embedding= embedding->embedding)
8480     {
8481       // Ignore join nests that are not outer join nests:
8482       if (!embedding->join_cond_optim())
8483         continue;
8484       NESTED_JOIN *const nested_join= embedding->nested_join;
8485       if (!nested_join->nj_counter)
8486       {
8487         /*
8488           Table tab is the first inner table for nested_join.
8489           Save reference to it in the nested join structure.
8490         */
8491         nested_join->first_nested= i;
8492         tab->init_join_cond_ref(embedding);
8493         tab->cond_equal= tbl->cond_equal;
8494 
8495         TABLE_LIST *const outer_join_nest= embedding->outer_join_nest();
8496         if (outer_join_nest)
8497           tab->set_first_upper(outer_join_nest->nested_join->first_nested);
8498       }
8499       if (tab->first_inner() == NO_PLAN_IDX)
8500         tab->set_first_inner(nested_join->first_nested);
8501       if (++nested_join->nj_counter < nested_join->nj_total)
8502         break;
8503       // Table tab is the last inner table for nested join.
8504       best_ref[nested_join->first_nested]->set_last_inner(i);
8505     }
8506   }
8507   DBUG_VOID_RETURN;
8508 }
8509 
8510 /**
8511   Build a condition guarded by match variables for embedded outer joins.
8512   When generating a condition for a table as part of an outer join condition
8513   or the WHERE condition, the table in question may also be part of an
8514   embedded outer join. In such cases, the condition must be guarded by
8515   the match variable for this embedded outer join. Such embedded outer joins
8516   may also be recursively embedded in other joins.
8517 
8518   The function recursively adds guards for a condition ascending from tab
8519   to root_tab, which is the first inner table of an outer join,
8520   or NULL if the condition being handled is the WHERE clause.
8521 
8522   @param idx       index of the first inner table for the inner-most outer join
8523   @param cond      the predicate to be guarded (must be set)
8524   @param root_idx  index of the inner table to stop at
8525                    (is NO_PLAN_IDX if this is the WHERE clause)
8526 
8527   @return
8528     -  pointer to the guarded predicate, if success
8529     -  NULL if error
8530 */
8531 
8532 static Item*
add_found_match_trig_cond(JOIN * join,plan_idx idx,Item * cond,plan_idx root_idx)8533 add_found_match_trig_cond(JOIN *join, plan_idx idx, Item *cond,
8534                           plan_idx root_idx)
8535 {
8536   ASSERT_BEST_REF_IN_JOIN_ORDER(join);
8537   assert(cond);
8538 
8539   for ( ; idx != root_idx; idx= join->best_ref[idx]->first_upper())
8540   {
8541     if (!(cond= new Item_func_trig_cond(cond, NULL, join, idx,
8542                                         Item_func_trig_cond::FOUND_MATCH)))
8543       return NULL;
8544 
8545     cond->quick_fix_field();
8546     cond->update_used_tables();
8547   }
8548 
8549   return cond;
8550 }
8551 
8552 
8553 /**
8554   Attach outer join conditions to generated table conditions in an optimal way.
8555 
8556   @param last_tab - Last table that has been added to the current plan.
8557                     Pre-condition: If this is the last inner table of an outer
8558                     join operation, a join condition is attached to the first
8559                     inner table of that outer join operation.
8560 
8561   @return false if success, true if error.
8562 
8563   Outer join conditions are attached to individual tables, but we can analyze
8564   those conditions only when reaching the last inner table of an outer join
8565   operation. Notice also that a table can be last within several outer join
8566   nests, hence the outer for() loop of this function.
8567 
8568   Example:
8569     SELECT * FROM t1 LEFT JOIN (t2 LEFT JOIN t3 ON t2.a=t3.a) ON t1.a=t2.a
8570 
8571     Table t3 is last both in the join nest (t2 - t3) and in (t1 - (t2 - t3))
8572     Thus, join conditions for both join nests will be evaluated when reaching
8573     this table.
8574 
8575   For each outer join operation processed, the join condition is split
8576   optimally over the inner tables of the outer join. The split-out conditions
8577   are later referred to as table conditions (but note that several table
8578   conditions stemming from different join operations may be combined into
8579   a composite table condition).
8580 
8581   Example:
8582     Consider the above query once more.
8583     The predicate t1.a=t2.a can be evaluated when rows from t1 and t2 are ready,
8584     ie at table t2. The predicate t2.a=t3.a can be evaluated at table t3.
8585 
8586   Each non-constant split-out table condition is guarded by a match variable
8587   that enables it only when a matching row is found for all the embedded
8588   outer join operations.
8589 
8590   Each split-out table condition is guarded by a variable that turns the
8591   condition off just before a null-complemented row for the outer join
8592   operation is formed. Thus, the join condition will not be checked for
8593   the null-complemented row.
8594 */
8595 
attach_join_conditions(plan_idx last_tab)8596 bool JOIN::attach_join_conditions(plan_idx last_tab)
8597 {
8598   DBUG_ENTER("JOIN::attach_join_conditions");
8599   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
8600 
8601   for (plan_idx first_inner= best_ref[last_tab]->first_inner();
8602        first_inner != NO_PLAN_IDX &&
8603          best_ref[first_inner]->last_inner() == last_tab;
8604        first_inner= best_ref[first_inner]->first_upper())
8605   {
8606     /*
8607       Table last_tab is the last inner table of an outer join, locate
8608       the corresponding join condition from the first inner table of the
8609       same outer join:
8610     */
8611     Item *const join_cond= best_ref[first_inner]->join_cond();
8612     assert(join_cond);
8613     /*
8614       Add the constant part of the join condition to the first inner table
8615       of the outer join.
8616     */
8617     Item *cond= make_cond_for_table(join_cond, const_table_map,
8618                                     (table_map) 0, false);
8619     if (cond)
8620     {
8621       cond= new Item_func_trig_cond(cond, NULL, this, first_inner,
8622                                     Item_func_trig_cond::IS_NOT_NULL_COMPL);
8623       if (!cond)
8624         DBUG_RETURN(true);
8625       if (cond->fix_fields(thd, NULL))
8626         DBUG_RETURN(true);
8627 
8628       if (best_ref[first_inner]->and_with_condition(cond))
8629         DBUG_RETURN(true);
8630     }
8631     /*
8632       Split the non-constant part of the join condition into parts that
8633       can be attached to the inner tables of the outer join.
8634     */
8635     for (plan_idx i= first_inner; i <= last_tab; ++i)
8636     {
8637       table_map prefix_tables= best_ref[i]->prefix_tables();
8638       table_map added_tables= best_ref[i]->added_tables();
8639 
8640       /*
8641         When handling the first inner table of an outer join, we may also
8642         reference all tables ahead of this table:
8643       */
8644       if (i == first_inner)
8645         added_tables= prefix_tables;
8646       /*
8647         We need RAND_TABLE_BIT on the last inner table, in case there is a
8648         non-deterministic function in the join condition.
8649         (RAND_TABLE_BIT is set for the last table of the join plan,
8650          but this is not sufficient for join conditions, which may have a
8651          last inner table that is ahead of the last table of the join plan).
8652       */
8653       if (i == last_tab)
8654       {
8655         prefix_tables|= RAND_TABLE_BIT;
8656         added_tables|= RAND_TABLE_BIT;
8657       }
8658       cond= make_cond_for_table(join_cond, prefix_tables, added_tables, false);
8659       if (cond == NULL)
8660         continue;
8661       /*
8662         If the table is part of an outer join that is embedded in the
8663         outer join currently being processed, wrap the condition in
8664         triggered conditions for match variables of such embedded outer joins.
8665       */
8666       if (!(cond= add_found_match_trig_cond(this, best_ref[i]->first_inner(),
8667                                             cond, first_inner)))
8668         DBUG_RETURN(true);
8669 
8670       // Add the guard turning the predicate off for the null-complemented row.
8671       cond= new Item_func_trig_cond(cond, NULL, this, first_inner,
8672                                     Item_func_trig_cond::IS_NOT_NULL_COMPL);
8673       if (!cond)
8674         DBUG_RETURN(true);
8675       if (cond->fix_fields(thd, NULL))
8676         DBUG_RETURN(true);
8677 
8678       // Add the generated condition to the existing table condition
8679       if (best_ref[i]->and_with_condition(cond))
8680         DBUG_RETURN(true);
8681     }
8682   }
8683 
8684   DBUG_RETURN(false);
8685 }
8686 
8687 
8688 /*****************************************************************************
8689   Remove calculation with tables that aren't yet read. Remove also tests
8690   against fields that are read through key where the table is not a
8691   outer join table.
8692   We can't remove tests that are made against columns which are stored
8693   in sorted order.
8694 *****************************************************************************/
8695 
8696 static Item *
part_of_refkey(TABLE * table,TABLE_REF * ref,Field * field)8697 part_of_refkey(TABLE *table, TABLE_REF *ref, Field *field)
8698 {
8699   uint ref_parts= ref->key_parts;
8700   if (ref_parts)
8701   {
8702     if (ref->has_guarded_conds())
8703       return NULL;
8704 
8705     const KEY_PART_INFO *key_part= table->key_info[ref->key].key_part;
8706 
8707     for (uint part=0 ; part < ref_parts ; part++,key_part++)
8708       if (field->eq(key_part->field) &&
8709 	  !(key_part->key_part_flag & HA_PART_KEY_SEG))
8710 	return ref->items[part];
8711   }
8712   return NULL;
8713 }
8714 
8715 
8716 /**
8717   @return
8718     1 if right_item is used removable reference key on left_item
8719 
8720   @note see comments in make_cond_for_table_from_pred() about careful
8721   usage/modifications of test_if_ref().
8722 */
8723 
test_if_ref(Item * root_cond,Item_field * left_item,Item * right_item)8724 static bool test_if_ref(Item *root_cond,
8725                         Item_field *left_item,Item *right_item)
8726 {
8727   if (left_item->depended_from)
8728     return false; // don't even read join_tab of inner subquery!
8729   Field *field=left_item->field;
8730   JOIN_TAB *join_tab= field->table->reginfo.join_tab;
8731   if (join_tab)
8732     ASSERT_BEST_REF_IN_JOIN_ORDER(join_tab->join());
8733  // No need to change const test
8734   if (!field->table->const_table && join_tab &&
8735       (join_tab->first_inner() == NO_PLAN_IDX ||
8736        join_tab->join()->best_ref[join_tab->first_inner()]->join_cond() == root_cond) &&
8737       /* "ref_or_null" implements "x=y or x is null", not "x=y" */
8738       (join_tab->type() != JT_REF_OR_NULL))
8739   {
8740     Item *ref_item= part_of_refkey(field->table, &join_tab->ref(), field);
8741     if (ref_item && ref_item->eq(right_item,1))
8742     {
8743       right_item= right_item->real_item();
8744       if (right_item->type() == Item::FIELD_ITEM)
8745 	return (field->eq_def(((Item_field *) right_item)->field));
8746       /* remove equalities injected by IN->EXISTS transformation */
8747       else if (right_item->type() == Item::CACHE_ITEM)
8748         return ((Item_cache *)right_item)->eq_def (field);
8749       if (right_item->const_item() && !(right_item->is_null()))
8750       {
8751         /*
8752           We can remove all fields except:
8753           1. String data types:
8754            - For BINARY/VARBINARY fields with equality against a
8755              string: Ref access can return more rows than match the
8756              string. The reason seems to be that the string constant
8757              is not "padded" to the full length of the field when
8758              setting up ref access. @todo Change how ref access for
8759              BINARY/VARBINARY fields are done so that only qualifying
8760              rows are returned from the storage engine.
8761           2. Float data type: Comparison of float can differ
8762            - When we search "WHERE field=value" using an index,
8763              the "value" side is converted from double to float by
8764              Field_float::store(), then two floats are compared.
8765            - When we search "WHERE field=value" without indexes,
8766              the "field" side is converted from float to double by
8767              Field_float::val_real(), then two doubles are compared.
8768           Note about string data types: All currently existing
8769           collations have "PAD SPACE" style. If we introduce "NO PAD"
8770           collations this function must return false for such
8771           collations, because trailing space compression for indexes
8772           makes the table value and the index value not equal to each
8773           other in "NO PAD" collations. As index lookup strips
8774           trailing spaces, it can return false candidates. Further
8775           comparison of the actual table values is required.
8776         */
8777         if (!((field->type() == MYSQL_TYPE_STRING ||                       // 1
8778                field->type() == MYSQL_TYPE_VARCHAR) && field->binary()) &&
8779             !(field->type() == MYSQL_TYPE_FLOAT && field->decimals() > 0)) // 2
8780         {
8781           return !right_item->save_in_field_no_warnings(field, true);
8782         }
8783       }
8784     }
8785   }
8786   return 0;					// keep test
8787 }
8788 
8789 
8790 /*
8791   Remove the predicates pushed down into the subquery
8792 
8793   DESCRIPTION
8794     Given that this join will be executed using (unique|index)_subquery,
8795     without "checking NULL", remove the predicates that were pushed down
8796     into the subquery.
8797 
8798     If the subquery compares scalar values, we can remove the condition that
8799     was wrapped into trig_cond (it will be checked when needed by the subquery
8800     engine)
8801 
8802     If the subquery compares row values, we need to keep the wrapped
8803     equalities in the WHERE clause: when the left (outer) tuple has both NULL
8804     and non-NULL values, we'll do a full table scan and will rely on the
8805     equalities corresponding to non-NULL parts of left tuple to filter out
8806     non-matching records.
8807 
8808     If '*where' is a triggered condition, or contains 'OR x IS NULL', or
8809     contains a condition coming from the original subquery's WHERE clause, or
8810     if there are more than one outer expressions, then WHERE is not of the
8811     simple form:
8812       outer_expr = inner_expr
8813     and thus this function does nothing.
8814 
8815     If the index is on prefix (=> test_if_ref() is false), then the equality
8816     is needed as post-filter, so this function does nothing.
8817 
8818     TODO: We can remove the equalities that will be guaranteed to be true by the
8819     fact that subquery engine will be using index lookup. This must be done only
8820     for cases where there are no conversion errors of significance, e.g. 257
8821     that is searched in a byte. But this requires homogenization of the return
8822     codes of all Field*::store() methods.
8823 */
remove_subq_pushed_predicates()8824 void JOIN::remove_subq_pushed_predicates()
8825 {
8826   if (where_cond->type() != Item::FUNC_ITEM)
8827     return;
8828   Item_func *const func= static_cast<Item_func *>(where_cond);
8829   if (func->functype() == Item_func::EQ_FUNC &&
8830       func->arguments()[0]->type() == Item::REF_ITEM &&
8831       func->arguments()[1]->type() == Item::FIELD_ITEM &&
8832       test_if_ref(func,
8833                   static_cast<Item_field *>(func->arguments()[1]),
8834                   func->arguments()[0]))
8835   {
8836     where_cond= NULL;
8837     return;
8838   }
8839 }
8840 
8841 
8842 /**
8843   @brief
8844   Add keys to derived tables'/views' result tables in a list
8845 
8846   @param select_lex generate derived keys for select_lex's derived tables
8847 
8848   @details
8849   This function generates keys for all derived tables/views of the select_lex
8850   to which this join corresponds to with help of the TABLE_LIST:generate_keys
8851   function.
8852 
8853   @return FALSE all keys were successfully added.
8854   @return TRUE OOM error
8855 */
8856 
generate_derived_keys()8857 bool JOIN::generate_derived_keys()
8858 {
8859   assert(select_lex->materialized_derived_table_count);
8860 
8861   for (TABLE_LIST *table= select_lex->leaf_tables;
8862        table;
8863        table= table->next_leaf)
8864   {
8865     table->derived_keys_ready= TRUE;
8866     /* Process tables that aren't materialized yet. */
8867     if (table->uses_materialization() && !table->table->is_created() &&
8868         table->generate_keys())
8869       return TRUE;
8870   }
8871   return FALSE;
8872 }
8873 
8874 
8875 /**
8876   @brief
8877   Drop unused keys for each materialized derived table/view
8878 
8879   @details
8880   For each materialized derived table/view, call TABLE::use_index to save one
8881   index chosen by the optimizer and ignore others. If no key is chosen, then all
8882   keys will be ignored.
8883 */
8884 
drop_unused_derived_keys()8885 void JOIN::drop_unused_derived_keys()
8886 {
8887   assert(select_lex->materialized_derived_table_count);
8888   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
8889 
8890   for (uint i= 0 ; i < tables ; i++)
8891   {
8892     JOIN_TAB *tab= best_ref[i];
8893     TABLE *table= tab->table();
8894     /*
8895      Save chosen key description if:
8896      1) it's a materialized derived table
8897      2) it's not yet instantiated
8898      3) some keys are defined for it
8899     */
8900     if (table &&
8901         tab->table_ref->uses_materialization() &&               // (1)
8902         !table->is_created() &&                                 // (2)
8903         table->max_keys > 0)                                    // (3)
8904     {
8905       Key_use *keyuse= tab->position()->key;
8906 
8907       table->use_index(keyuse ? keyuse->key : -1);
8908 
8909       const bool key_is_const= keyuse && tab->const_keys.is_set(keyuse->key);
8910       tab->const_keys.clear_all();
8911       tab->keys().clear_all();
8912 
8913       if (!keyuse)
8914         continue;
8915 
8916       /*
8917         Update the selected "keyuse" to point to key number 0.
8918         Notice that unused keyuse entries still point to the deleted
8919         candidate keys. tab->keys (and tab->const_keys if the chosen key
8920         is constant) should reference key object no. 0 as well.
8921       */
8922       tab->keys().set_bit(0);
8923       if (key_is_const)
8924         tab->const_keys.set_bit(0);
8925 
8926       const uint oldkey= keyuse->key;
8927       for (; keyuse->table_ref == tab->table_ref && keyuse->key == oldkey;
8928            keyuse++)
8929         keyuse->key= 0;
8930     }
8931   }
8932 }
8933 
8934 
8935 /**
8936   Cache constant expressions in WHERE, HAVING, ON conditions.
8937 
8938   @return False if success, True if error
8939 
8940   @note This function is run after conditions have been pushed down to
8941         individual tables, so transformation is applied to JOIN_TAB::condition
8942         and not to the WHERE condition.
8943 */
8944 
cache_const_exprs()8945 bool JOIN::cache_const_exprs()
8946 {
8947   /* No need in cache if all tables are constant. */
8948   assert(!plan_is_const());
8949   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
8950 
8951   for (uint i= const_tables; i < tables; i++)
8952   {
8953     Item *condition= best_ref[i]->condition();
8954     if (condition == NULL)
8955       continue;
8956     Item *cache_item= NULL;
8957     Item **analyzer_arg= &cache_item;
8958     condition=
8959       condition->compile(&Item::cache_const_expr_analyzer,
8960                          (uchar **)&analyzer_arg,
8961                          &Item::cache_const_expr_transformer,
8962                          (uchar *)&cache_item);
8963     if (condition == NULL)
8964       return true;
8965     best_ref[i]->set_condition(condition);
8966   }
8967   if (having_cond)
8968   {
8969     Item *cache_item= NULL;
8970     Item **analyzer_arg= &cache_item;
8971     having_cond= having_cond->compile(&Item::cache_const_expr_analyzer,
8972                                       (uchar **)&analyzer_arg,
8973                                       &Item::cache_const_expr_transformer,
8974                                       (uchar *)&cache_item);
8975     if (having_cond == NULL)
8976       return true;
8977   }
8978   return false;
8979 }
8980 
8981 
8982 /**
8983   Extract a condition that can be checked after reading given table
8984 
8985   @param cond       Condition to analyze
8986   @param tables     Tables for which "current field values" are available
8987   @param used_table Table(s) that we are extracting the condition for (may
8988                     also include PSEUDO_TABLE_BITS, and may be zero)
8989   @param exclude_expensive_cond  Do not push expensive conditions
8990 
8991   @retval <>NULL Generated condition
8992   @retval = NULL Already checked, OR error
8993 
8994   @details
8995     Extract the condition that can be checked after reading the table(s)
8996     specified in @c used_table, given that current-field values for tables
8997     specified in @c tables bitmap are available.
8998     If @c used_table is 0, extract conditions for all tables in @c tables.
8999 
9000     This function can be used to extract conditions relevant for a table
9001     in a join order. Together with its caller, it will ensure that all
9002     conditions are attached to the first table in the join order where all
9003     necessary fields are available, and it will also ensure that a given
9004     condition is attached to only one table.
9005     To accomplish this, first initialize @c tables to the empty
9006     set. Then, loop over all tables in the join order, set @c used_table to
9007     the bit representing the current table, accumulate @c used_table into the
9008     @c tables set, and call this function. To ensure correct handling of
9009     const expressions and outer references, add the const table map and
9010     OUTER_REF_TABLE_BIT to @c used_table for the first table. To ensure
9011     that random expressions are evaluated for the final table, add
9012     RAND_TABLE_BIT to @c used_table for the final table.
9013 
9014     The function assumes that constant, inexpensive parts of the condition
9015     have already been checked. Constant, expensive parts will be attached
9016     to the first table in the join order, provided that the above call
9017     sequence is followed.
9018 
9019     The call order will ensure that conditions covering tables in @c tables
9020     minus those in @c used_table, have already been checked.
9021 
9022     The function takes into account that some parts of the condition are
9023     guaranteed to be true by employed 'ref' access methods (the code that
9024     does this is located at the end, search down for "EQ_FUNC").
9025 
9026   @note
9027     make_cond_for_info_schema() uses an algorithm similar to
9028     make_cond_for_table().
9029 */
9030 
9031 Item *
make_cond_for_table(Item * cond,table_map tables,table_map used_table,bool exclude_expensive_cond)9032 make_cond_for_table(Item *cond, table_map tables, table_map used_table,
9033                     bool exclude_expensive_cond)
9034 {
9035   return make_cond_for_table_from_pred(cond, cond, tables, used_table,
9036                                        exclude_expensive_cond);
9037 }
9038 
9039 static Item *
make_cond_for_table_from_pred(Item * root_cond,Item * cond,table_map tables,table_map used_table,bool exclude_expensive_cond)9040 make_cond_for_table_from_pred(Item *root_cond, Item *cond,
9041                               table_map tables, table_map used_table,
9042                               bool exclude_expensive_cond)
9043 {
9044   /*
9045     Ignore this condition if
9046      1. We are extracting conditions for a specific table, and
9047      2. that table is not referenced by the condition, but not if
9048      3. this is a constant condition not checked at optimization time and
9049         this is the first table we are extracting conditions for.
9050        (Assuming that used_table == tables for the first table.)
9051   */
9052   if (used_table &&                                                 // 1
9053       !(cond->used_tables() & used_table) &&                        // 2
9054       !(cond->is_expensive() && used_table == tables))              // 3
9055     return NULL;
9056 
9057   if (cond->type() == Item::COND_ITEM)
9058   {
9059     if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
9060     {
9061       /* Create new top level AND item */
9062       Item_cond_and *new_cond= new Item_cond_and;
9063       if (!new_cond)
9064         return NULL;
9065       List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
9066       Item *item;
9067       while ((item= li++))
9068       {
9069         Item *fix= make_cond_for_table_from_pred(root_cond, item,
9070                                                  tables, used_table,
9071                                                  exclude_expensive_cond);
9072         if (fix)
9073           new_cond->argument_list()->push_back(fix);
9074       }
9075       switch (new_cond->argument_list()->elements) {
9076       case 0:
9077         return NULL;                          // Always true
9078       case 1:
9079         return new_cond->argument_list()->head();
9080       default:
9081         if (new_cond->fix_fields(current_thd, NULL))
9082           return NULL;
9083         return new_cond;
9084       }
9085     }
9086     else
9087     {                                         // Or list
9088       Item_cond_or *new_cond= new Item_cond_or;
9089       if (!new_cond)
9090         return NULL;
9091       List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
9092       Item *item;
9093       while ((item= li++))
9094       {
9095         Item *fix= make_cond_for_table_from_pred(root_cond, item,
9096                                                  tables, 0L,
9097                                                  exclude_expensive_cond);
9098 	if (!fix)
9099           return NULL;                        // Always true
9100 	new_cond->argument_list()->push_back(fix);
9101       }
9102       if (new_cond->fix_fields(current_thd, NULL))
9103         return NULL;
9104       return new_cond;
9105     }
9106   }
9107 
9108   /*
9109     Omit this condition if
9110      1. It has been marked as omittable before, or
9111      2. Some tables referred by the condition are not available, or
9112      3. We are extracting conditions for all tables, the condition is
9113         considered 'expensive', and we want to delay evaluation of such
9114         conditions to the execution phase.
9115   */
9116   if (cond->marker == 3 ||                                             // 1
9117       (cond->used_tables() & ~tables) ||                               // 2
9118       (!used_table && exclude_expensive_cond && cond->is_expensive())) // 3
9119     return NULL;
9120 
9121   /*
9122     Extract this condition if
9123      1. It has already been marked as applicable, or
9124      2. It is not a <comparison predicate> (=, <, >, <=, >=, <=>)
9125   */
9126   if (cond->marker == 2 ||                                             // 1
9127       cond->eq_cmp_result() == Item::COND_OK)                          // 2
9128     return cond;
9129 
9130   /*
9131     Remove equalities that are guaranteed to be true by use of 'ref' access
9132     method.
9133     Note that ref access implements "table1.field1 <=> table2.indexed_field2",
9134     i.e. if it passed a NULL field1, it will return NULL indexed_field2 if
9135     there are.
9136     Thus the equality "table1.field1 = table2.indexed_field2",
9137     is equivalent to "ref access AND table1.field1 IS NOT NULL"
9138     i.e. "ref access and proper setting/testing of ref->null_rejecting".
9139     Thus, we must be careful, that when we remove equalities below we also
9140     set ref->null_rejecting, and test it at execution; otherwise wrong NULL
9141     matches appear.
9142     So:
9143     - for the optimization phase, the code which is below, and the code in
9144     test_if_ref(), and in add_key_field(), must be kept in sync: if the
9145     applicability conditions in one place are relaxed, they should also be
9146     relaxed elsewhere.
9147     - for the execution phase, all possible execution methods must test
9148     ref->null_rejecting.
9149   */
9150   if (cond->type() == Item::FUNC_ITEM &&
9151       ((Item_func*) cond)->functype() == Item_func::EQ_FUNC)
9152   {
9153     Item *left_item= ((Item_func*) cond)->arguments()[0]->real_item();
9154     Item *right_item= ((Item_func*) cond)->arguments()[1]->real_item();
9155     if ((left_item->type() == Item::FIELD_ITEM &&
9156          test_if_ref(root_cond, (Item_field*) left_item, right_item)) ||
9157         (right_item->type() == Item::FIELD_ITEM &&
9158          test_if_ref(root_cond, (Item_field*) right_item, left_item)))
9159     {
9160       cond->marker= 3;                   // Condition can be omitted
9161       return NULL;
9162     }
9163   }
9164   cond->marker= 2;                      // Mark condition as applicable
9165   return cond;
9166 }
9167 
9168 
9169 /**
9170   Separates the predicates in a join condition and pushes them to the
9171   join step where all involved tables are available in the join prefix.
9172   ON clauses from JOIN expressions are also pushed to the most appropriate step.
9173 
9174   @param join Join object where predicates are pushed.
9175 
9176   @param cond Pointer to condition which may contain an arbitrary number of
9177               predicates, combined using AND, OR and XOR items.
9178               If NULL, equivalent to a predicate that returns TRUE for all
9179               row combinations.
9180 
9181 
9182   @retval true  Found impossible WHERE clause, or out-of-memory
9183   @retval false Other
9184 */
9185 
make_join_select(JOIN * join,Item * cond)9186 static bool make_join_select(JOIN *join, Item *cond)
9187 {
9188   THD *thd= join->thd;
9189   Opt_trace_context * const trace= &thd->opt_trace;
9190   DBUG_ENTER("make_join_select");
9191   ASSERT_BEST_REF_IN_JOIN_ORDER(join);
9192 
9193   // Add IS NOT NULL conditions to table conditions:
9194   add_not_null_conds(join);
9195 
9196   /*
9197     Extract constant conditions that are part of the WHERE clause.
9198     Constant parts of join conditions from outer joins are attached to
9199     the appropriate table condition in JOIN::attach_join_conditions().
9200   */
9201   if (cond)                /* Because of QUICK_GROUP_MIN_MAX_SELECT */
9202   {                        /* there may be a select without a cond. */
9203     if (join->primary_tables > 1)
9204       cond->update_used_tables();    // Table number may have changed
9205     if (join->plan_is_const() &&
9206         join->select_lex->master_unit() ==
9207         thd->lex->unit)             // The outer-most query block
9208       join->const_table_map|= RAND_TABLE_BIT;
9209   }
9210   /*
9211     Extract conditions that depend on constant tables.
9212     The const part of the query's WHERE clause can be checked immediately
9213     and if it is not satisfied then the join has empty result
9214   */
9215   Item *const_cond= NULL;
9216   if (cond)
9217     const_cond= make_cond_for_table(cond, join->const_table_map,
9218                                     (table_map) 0, true);
9219 
9220   // Add conditions added by add_not_null_conds()
9221   for (uint i= 0; i < join->const_tables; i++)
9222   {
9223     if (and_conditions(&const_cond, join->best_ref[i]->condition()))
9224       DBUG_RETURN(true);
9225   }
9226   DBUG_EXECUTE("where", print_where(const_cond, "constants", QT_ORDINARY););
9227   if (const_cond != NULL)
9228   {
9229     const bool const_cond_result= const_cond->val_int() != 0;
9230     if (thd->is_error())
9231       DBUG_RETURN(true);
9232 
9233     Opt_trace_object trace_const_cond(trace);
9234     trace_const_cond.add("condition_on_constant_tables", const_cond)
9235                     .add("condition_value", const_cond_result);
9236     if (!const_cond_result)
9237     {
9238       DBUG_PRINT("info",("Found impossible WHERE condition"));
9239       DBUG_RETURN(true);
9240     }
9241   }
9242 
9243   /*
9244     Extract remaining conditions from WHERE clause and join conditions,
9245     and attach them to the most appropriate table condition. This means that
9246     a condition will be evaluated as soon as all fields it depends on are
9247     available. For outer join conditions, the additional criterion is that
9248     we must have determined whether outer-joined rows are available, or
9249     have been NULL-extended, see JOIN::attach_join_conditions() for details.
9250   */
9251   {
9252     Opt_trace_object trace_wrapper(trace);
9253     Opt_trace_object
9254       trace_conditions(trace, "attaching_conditions_to_tables");
9255     trace_conditions.add("original_condition", cond);
9256     Opt_trace_array
9257       trace_attached_comp(trace, "attached_conditions_computation");
9258 
9259     for (uint i=join->const_tables ; i < join->tables ; i++)
9260     {
9261       JOIN_TAB *const tab= join->best_ref[i];
9262 
9263       if (!tab->position())
9264         continue;
9265       /*
9266         first_inner is the X in queries like:
9267         SELECT * FROM t1 LEFT OUTER JOIN (t2 JOIN t3) ON X
9268       */
9269       const plan_idx first_inner= tab->first_inner();
9270       const table_map used_tables= tab->prefix_tables();
9271       const table_map current_map= tab->added_tables();
9272       Item *tmp= NULL;
9273 
9274       if (cond)
9275         tmp= make_cond_for_table(cond,used_tables,current_map, 0);
9276       /* Add conditions added by add_not_null_conds(). */
9277       if (tab->condition() && and_conditions(&tmp, tab->condition()))
9278         DBUG_RETURN(true);
9279 
9280 
9281       if (cond && !tmp && tab->quick())
9282       {						// Outer join
9283         assert(tab->type() == JT_RANGE || tab->type() == JT_INDEX_MERGE);
9284         /*
9285           Hack to handle the case where we only refer to a table
9286           in the ON part of an OUTER JOIN. In this case we want the code
9287           below to check if we should use 'quick' instead.
9288         */
9289         DBUG_PRINT("info", ("Item_int"));
9290         tmp= new Item_int((longlong) 1,1);	// Always true
9291       }
9292       if (tmp || !cond || tab->type() == JT_REF || tab->type() == JT_REF_OR_NULL ||
9293           tab->type() == JT_EQ_REF || first_inner != NO_PLAN_IDX)
9294       {
9295         DBUG_EXECUTE("where",print_where(tmp,tab->table()->alias, QT_ORDINARY););
9296         /*
9297           If tab is an inner table of an outer join operation,
9298           add a match guard to the pushed down predicate.
9299           The guard will turn the predicate on only after
9300           the first match for outer tables is encountered.
9301 	*/
9302         if (cond && tmp)
9303         {
9304           /*
9305             Because of QUICK_GROUP_MIN_MAX_SELECT there may be a select without
9306             a cond, so neutralize the hack above.
9307           */
9308           if (!(tmp= add_found_match_trig_cond(join, first_inner, tmp, NO_PLAN_IDX)))
9309             DBUG_RETURN(true);
9310           tab->set_condition(tmp);
9311           /* Push condition to storage engine if this is enabled
9312              and the condition is not guarded */
9313 	  if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN) &&
9314               first_inner == NO_PLAN_IDX)
9315           {
9316             Item *push_cond=
9317               make_cond_for_table(tmp, tab->table_ref->map(),
9318                                   tab->table_ref->map(), 0);
9319             if (push_cond)
9320             {
9321               /* Push condition to handler */
9322               if (!tab->table()->file->cond_push(push_cond))
9323                 tab->table()->file->pushed_cond= push_cond;
9324             }
9325           }
9326         }
9327         else
9328         {
9329           tab->set_condition(NULL);
9330         }
9331 
9332         DBUG_EXECUTE("where",print_where(tmp,tab->table()->alias, QT_ORDINARY););
9333 
9334 	if (tab->quick())
9335 	{
9336           if (tab->needed_reg.is_clear_all() && tab->type() != JT_CONST)
9337           {
9338             /*
9339               We keep (for now) the QUICK AM calculated in
9340               get_quick_record_count().
9341             */
9342             assert(tab->quick()->is_valid());
9343 	  }
9344 	  else
9345           {
9346             delete tab->quick();
9347 	    tab->set_quick(NULL);
9348           }
9349 	}
9350 
9351         if ((tab->type() == JT_ALL || tab->type() == JT_RANGE ||
9352             tab->type() == JT_INDEX_MERGE || tab->type() == JT_INDEX_SCAN) &&
9353             tab->use_quick != QS_RANGE)
9354 	{
9355           /*
9356             We plan to scan (table/index/range scan).
9357             Check again if we should use an index. We can use an index if:
9358 
9359             1a) There is a condition that range optimizer can work on, and
9360             1b) There are non-constant conditions on one or more keys, and
9361             1c) Some of the non-constant fields may have been read
9362                 already. This may be the case if this is not the first
9363                 table in the join OR this is a subselect with
9364                 non-constant conditions referring to an outer table
9365                 (dependent subquery)
9366                 or,
9367             2a) There are conditions only relying on constants
9368             2b) This is the first non-constant table
9369             2c) There is a limit of rows to read that is lower than
9370                 the fanout for this table, predicate filters included
9371                 (i.e., the estimated number of rows that will be
9372                 produced for this table per row combination of
9373                 previous tables)
9374             2d) The query is NOT run with FOUND_ROWS() (because in that
9375                 case we have to scan through all rows to count them anyway)
9376           */
9377           enum { DONT_RECHECK, NOT_FIRST_TABLE, LOW_LIMIT }
9378           recheck_reason= DONT_RECHECK;
9379 
9380           assert(tab->const_keys.is_subset(tab->keys()));
9381 
9382           const join_type orig_join_type= tab->type();
9383           const QUICK_SELECT_I *const orig_quick= tab->quick();
9384 
9385           if (cond &&                                                // 1a
9386               (tab->keys() != tab->const_keys) &&                      // 1b
9387               (i > 0 ||                                              // 1c
9388                (join->select_lex->master_unit()->item &&
9389                 cond->used_tables() & OUTER_REF_TABLE_BIT)))
9390             recheck_reason= NOT_FIRST_TABLE;
9391           else if (!tab->const_keys.is_clear_all() &&                // 2a
9392                    i == join->const_tables &&                        // 2b
9393                    (join->unit->select_limit_cnt <
9394                     (tab->position()->rows_fetched *
9395                      tab->position()->filter_effect)) &&               // 2c
9396                    !join->calc_found_rows)                             // 2d
9397             recheck_reason= LOW_LIMIT;
9398 
9399           if (tab->position()->sj_strategy == SJ_OPT_LOOSE_SCAN)
9400           {
9401             /*
9402               Semijoin loose scan has settled for a certain index-based access
9403               method with suitable characteristics, don't substitute it.
9404             */
9405             recheck_reason= DONT_RECHECK;
9406           }
9407 
9408           if (recheck_reason != DONT_RECHECK)
9409           {
9410             Opt_trace_object trace_one_table(trace);
9411             trace_one_table.add_utf8_table(tab->table_ref);
9412             Opt_trace_object trace_table(trace, "rechecking_index_usage");
9413             if (recheck_reason == NOT_FIRST_TABLE)
9414               trace_table.add_alnum("recheck_reason", "not_first_table");
9415             else
9416               trace_table.add_alnum("recheck_reason", "low_limit").
9417                 add("limit", join->unit->select_limit_cnt).
9418                 add("row_estimate",
9419                     tab->position()->rows_fetched *
9420                     tab->position()->filter_effect);
9421 
9422             /* Join with outer join condition */
9423             Item *orig_cond= tab->condition();
9424             tab->and_with_condition(tab->join_cond());
9425 
9426             /*
9427               We can't call sel->cond->fix_fields,
9428               as it will break tab->join_cond() if it's AND condition
9429               (fix_fields currently removes extra AND/OR levels).
9430               Yet attributes of the just built condition are not needed.
9431               Thus we call sel->cond->quick_fix_field for safety.
9432             */
9433             if (tab->condition() && !tab->condition()->fixed)
9434               tab->condition()->quick_fix_field();
9435 
9436             key_map usable_keys= tab->keys();
9437             ORDER::enum_order interesting_order= ORDER::ORDER_NOT_RELEVANT;
9438 
9439             if (recheck_reason == LOW_LIMIT)
9440             {
9441               int read_direction= 0;
9442 
9443               /*
9444                 If the current plan is to use range, then check if the
9445                 already selected index provides the order dictated by the
9446                 ORDER BY clause.
9447               */
9448               if (tab->quick() && tab->quick()->index != MAX_KEY)
9449               {
9450                 const uint ref_key= tab->quick()->index;
9451 
9452                 read_direction= test_if_order_by_key(join->order,
9453                                                      tab->table(), ref_key);
9454                 /*
9455                   If the index provides order there is no need to recheck
9456                   index usage; we already know from the former call to
9457                   test_quick_select() that a range scan on the chosen
9458                   index is cheapest. Note that previous calls to
9459                   test_quick_select() did not take order direction
9460                   (ASC/DESC) into account, so in case of DESC ordering
9461                   we still need to recheck.
9462                 */
9463                 if ((read_direction == 1) ||
9464                     (read_direction == -1 && tab->quick()->reverse_sorted()))
9465                 {
9466                   recheck_reason= DONT_RECHECK;
9467                 }
9468               }
9469               /*
9470                 We do a cost based search for an ordering index here. Do this
9471                 only if prefer_ordering_index switch is on or an index is
9472                 forced for order by
9473               */
9474               if (recheck_reason != DONT_RECHECK &&
9475                   (tab->table()->force_index_order ||
9476                    thd->optimizer_switch_flag(
9477                        OPTIMIZER_SWITCH_PREFER_ORDERING_INDEX)))
9478               {
9479                 int best_key= -1;
9480                 ha_rows select_limit= join->unit->select_limit_cnt;
9481 
9482                 /* Use index specified in FORCE INDEX FOR ORDER BY, if any. */
9483                 if (tab->table()->force_index)
9484                   usable_keys.intersect(tab->table()->keys_in_use_for_order_by);
9485 
9486                 /* Do a cost based search on the indexes that give sort order */
9487                 test_if_cheaper_ordering(tab, join->order, tab->table(),
9488                                          usable_keys, -1, select_limit,
9489                                          &best_key, &read_direction,
9490                                          &select_limit);
9491                 if (best_key < 0)
9492                   recheck_reason= DONT_RECHECK; // No usable keys
9493                 else
9494                 {
9495                   // Only usable_key is the best_key chosen
9496                   usable_keys.clear_all();
9497                   usable_keys.set_bit(best_key);
9498                   interesting_order= (read_direction == -1 ? ORDER::ORDER_DESC :
9499                                       ORDER::ORDER_ASC);
9500                 }
9501               }
9502             }
9503 
9504             bool search_if_impossible= recheck_reason != DONT_RECHECK;
9505             if (search_if_impossible)
9506             {
9507               if (tab->quick())
9508               {
9509                 delete tab->quick();
9510                 tab->set_type(JT_ALL);
9511               }
9512               QUICK_SELECT_I *qck;
9513               search_if_impossible=
9514                 test_quick_select(thd, usable_keys,
9515                                   used_tables & ~tab->table_ref->map(),
9516                                   join->calc_found_rows ?
9517                                    HA_POS_ERROR :
9518                                    join->unit->select_limit_cnt,
9519                                   false,   // don't force quick range
9520                                   interesting_order, tab,
9521                                   tab->condition(),
9522                                   &tab->needed_reg, &qck,
9523                                   tab->table()->force_index) < 0;
9524               tab->set_quick(qck);
9525             }
9526             tab->set_condition(orig_cond);
9527             if (search_if_impossible)
9528             {
9529               /*
9530                 Before reporting "Impossible WHERE" for the whole query
9531                 we have to check isn't it only "impossible ON" instead
9532               */
9533               if (!tab->join_cond())
9534                 DBUG_RETURN(1);  // No ON, so it's really "impossible WHERE"
9535               Opt_trace_object trace_without_on(trace, "without_ON_clause");
9536               if (tab->quick())
9537               {
9538                 delete tab->quick();
9539                 tab->set_type(JT_ALL);
9540               }
9541               QUICK_SELECT_I *qck;
9542               const bool impossible_where=
9543                 test_quick_select(thd, tab->keys(),
9544                                   used_tables & ~tab->table_ref->map(),
9545                                   join->calc_found_rows ?
9546                                    HA_POS_ERROR :
9547                                    join->unit->select_limit_cnt,
9548                                   false,   //don't force quick range
9549                                   ORDER::ORDER_NOT_RELEVANT, tab,
9550                                   tab->condition(), &tab->needed_reg,
9551                                   &qck, tab->table()->force_index) < 0;
9552               tab->set_quick(qck);
9553               if (impossible_where)
9554                 DBUG_RETURN(1);			// Impossible WHERE
9555             }
9556 
9557             /*
9558               Access method changed. This is after deciding join order
9559               and access method for all other tables so the info
9560               updated below will not have any effect on the execution
9561               plan.
9562             */
9563             if (tab->quick())
9564               tab->set_type(calc_join_type(tab->quick()->get_type()));
9565 
9566           } // end of "if (recheck_reason != DONT_RECHECK)"
9567 
9568           if (!tab->table()->quick_keys.is_subset(tab->checked_keys) ||
9569               !tab->needed_reg.is_subset(tab->checked_keys))
9570           {
9571             tab->keys().merge(tab->table()->quick_keys);
9572             tab->keys().merge(tab->needed_reg);
9573 
9574             /*
9575               The logic below for assigning tab->use_quick is strange.
9576               It bases the decision of which access method to use
9577               (dynamic range, range, scan) based on seemingly
9578               unrelated information like the presense of another index
9579               with too bad selectivity to be used.
9580 
9581               Consider the following scenario:
9582 
9583               The join optimizer has decided to use join order
9584               (t1,t2), and 'tab' is currently t2. Further, assume that
9585               there is a join condition between t1 and t2 using some
9586               range operator (e.g. "t1.x < t2.y").
9587 
9588               It has been decided that a table scan is best for t2.
9589               make_join_select() then reran the range optimizer a few
9590               lines up because there is an index 't2.good_idx'
9591               covering the t2.y column. If 'good_idx' is the only
9592               index in t2, the decision below will be to use dynamic
9593               range. However, if t2 also has another index 't2.other'
9594               which the range access method can be used on but
9595               selectivity is bad (#rows estimate is high), then table
9596               scan is chosen instead.
9597 
9598               Thus, the choice of DYNAMIC RANGE vs SCAN depends on the
9599               presense of an index that has so bad selectivity that it
9600               will not be used anyway.
9601             */
9602             if (!tab->needed_reg.is_clear_all() &&
9603                 (tab->table()->quick_keys.is_clear_all() ||
9604                  (tab->quick() &&
9605                   (tab->quick()->records >= 100L))))
9606             {
9607               tab->use_quick= QS_DYNAMIC_RANGE;
9608               tab->set_type(JT_ALL);
9609             }
9610             else
9611               tab->use_quick= QS_RANGE;
9612           }
9613 
9614           if (tab->type() != orig_join_type ||
9615               tab->quick() != orig_quick)       // Access method changed
9616             tab->position()->filter_effect= COND_FILTER_STALE;
9617 
9618 	}
9619       }
9620 
9621       if (join->attach_join_conditions(i))
9622         DBUG_RETURN(true);
9623     }
9624     trace_attached_comp.end();
9625 
9626     /*
9627       In outer joins the loop above, in iteration for table #i, may push
9628       conditions to a table before #i. Thus, the processing below has to be in
9629       a separate loop:
9630     */
9631     Opt_trace_array trace_attached_summary(trace,
9632                                            "attached_conditions_summary");
9633     for (uint i= join->const_tables ; i < join->tables ; i++)
9634     {
9635       JOIN_TAB * const tab= join->best_ref[i];
9636       if (!tab->table())
9637         continue;
9638       Item * const cond= tab->condition();
9639       Opt_trace_object trace_one_table(trace);
9640       trace_one_table.add_utf8_table(tab->table_ref).
9641         add("attached", cond);
9642       if (cond &&
9643           cond->has_subquery() /* traverse only if needed */ )
9644       {
9645         /*
9646           Why we pass walk_subquery=false: imagine
9647           WHERE t1.col IN (SELECT * FROM t2
9648                              WHERE t2.col IN (SELECT * FROM t3)
9649           and tab==t1. The grandchild subquery (SELECT * FROM t3) should not
9650           be marked as "in condition of t1" but as "in condition of t2", for
9651           correct calculation of the number of its executions.
9652         */
9653         std::pair<SELECT_LEX *, int> pair_object(join->select_lex, i);
9654         cond->walk(&Item::inform_item_in_cond_of_tab,
9655                    Item::WALK_POSTFIX,
9656                    pointer_cast<uchar * const>(&pair_object));
9657       }
9658 
9659     }
9660   }
9661   DBUG_RETURN(0);
9662 }
9663 
9664 
9665 /**
9666   Remove the following expressions from ORDER BY and GROUP BY:
9667   Constant expressions @n
9668   Expression that only uses tables that are of type EQ_REF and the reference
9669   is in the ORDER list or if all refereed tables are of the above type.
9670 
9671   In the following, the X field can be removed:
9672   @code
9673   SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t1.a,t2.X
9674   SELECT * FROM t1,t2,t3 WHERE t1.a=t2.a AND t2.b=t3.b ORDER BY t1.a,t3.X
9675   @endcode
9676 
9677   These can't be optimized:
9678   @code
9679   SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t2.X,t1.a
9680   SELECT * FROM t1,t2 WHERE t1.a=t2.a AND t1.b=t2.b ORDER BY t1.a,t2.c
9681   SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t2.b,t1.a
9682   @endcode
9683 
9684   @param  JOIN         join object
9685   @param  start_order  clause being analyzed (ORDER BY, GROUP BY...)
9686   @param  tab          table
9687   @param  cached_eq_ref_tables  bitmap: bit Z is set if the table of map Z
9688   was already the subject of an eq_ref_table() call for the same clause; then
9689   the return value of this previous call can be found at bit Z of
9690   'eq_ref_tables'
9691   @param  eq_ref_tables see above.
9692 */
9693 
9694 static bool
eq_ref_table(JOIN * join,ORDER * start_order,JOIN_TAB * tab,table_map * cached_eq_ref_tables,table_map * eq_ref_tables)9695 eq_ref_table(JOIN *join, ORDER *start_order, JOIN_TAB *tab,
9696              table_map *cached_eq_ref_tables, table_map *eq_ref_tables)
9697 {
9698   /* We can skip const tables only if not an outer table */
9699   if (tab->type() == JT_CONST && tab->first_inner() == NO_PLAN_IDX)
9700     return true;
9701   if (tab->type() != JT_EQ_REF || tab->table()->is_nullable())
9702     return false;
9703 
9704   const table_map map= tab->table_ref->map();
9705   uint found= 0;
9706 
9707   for (Item **ref_item= tab->ref().items, **end= ref_item + tab->ref().key_parts ;
9708        ref_item != end ; ref_item++)
9709   {
9710     if (! (*ref_item)->const_item())
9711     {						// Not a const ref
9712       ORDER *order;
9713       for (order=start_order ; order ; order=order->next)
9714       {
9715 	if ((*ref_item)->eq(order->item[0],0))
9716 	  break;
9717       }
9718       if (order)
9719       {
9720         if (!(order->used & map))
9721         {
9722           found++;
9723           order->used|= map;
9724         }
9725 	continue;				// Used in ORDER BY
9726       }
9727       if (!only_eq_ref_tables(join, start_order, (*ref_item)->used_tables(),
9728                               cached_eq_ref_tables, eq_ref_tables))
9729         return false;
9730     }
9731   }
9732   /* Check that there was no reference to table before sort order */
9733   for (; found && start_order ; start_order=start_order->next)
9734   {
9735     if (start_order->used & map)
9736     {
9737       found--;
9738       continue;
9739     }
9740     if (start_order->depend_map & map)
9741       return false;
9742   }
9743   return true;
9744 }
9745 
9746 
9747 /// @see eq_ref_table()
9748 static bool
only_eq_ref_tables(JOIN * join,ORDER * order,table_map tables,table_map * cached_eq_ref_tables,table_map * eq_ref_tables)9749 only_eq_ref_tables(JOIN *join, ORDER *order, table_map tables,
9750                    table_map *cached_eq_ref_tables, table_map *eq_ref_tables)
9751 {
9752   tables&= ~PSEUDO_TABLE_BITS;
9753   for (JOIN_TAB **tab=join->map2table ; tables ; tab++, tables>>=1)
9754   {
9755     if (tables & 1)
9756     {
9757       const table_map map= (*tab)->table_ref->map();
9758       bool is_eq_ref;
9759       if (*cached_eq_ref_tables & map) // then there exists a cached bit
9760         is_eq_ref= *eq_ref_tables & map;
9761       else
9762       {
9763         is_eq_ref= eq_ref_table(join, order, *tab,
9764                                 cached_eq_ref_tables, eq_ref_tables);
9765         if (is_eq_ref)
9766           *eq_ref_tables|= map;
9767         else
9768           *eq_ref_tables&= ~map;
9769         *cached_eq_ref_tables|= map; // now there exists a cached bit
9770       }
9771       if (!is_eq_ref)
9772         return false;
9773     }
9774   }
9775   return true;
9776 }
9777 
9778 
9779 /**
9780   Check if an expression in ORDER BY or GROUP BY is a duplicate of a
9781   preceding expression.
9782 
9783   @param  first_order   the first expression in the ORDER BY or
9784                         GROUP BY clause
9785   @param  possible_dup  the expression that might be a duplicate of
9786                         another expression preceding it the ORDER BY
9787                         or GROUP BY clause
9788 
9789   @returns true if possible_dup is a duplicate, false otherwise
9790 */
duplicate_order(const ORDER * first_order,const ORDER * possible_dup)9791 static bool duplicate_order(const ORDER *first_order,
9792                             const ORDER *possible_dup)
9793 {
9794   const ORDER *order;
9795   for (order=first_order; order ; order=order->next)
9796   {
9797     if (order == possible_dup)
9798     {
9799       // all expressions preceding possible_dup have been checked.
9800       return false;
9801     }
9802     else
9803     {
9804       const Item *it1= order->item[0]->real_item();
9805       const Item *it2= possible_dup->item[0]->real_item();
9806 
9807       if (it1->eq(it2, 0))
9808         return true;
9809     }
9810   }
9811   return false;
9812 }
9813 
9814 /**
9815   Remove all constants and check if ORDER only contains simple
9816   expressions.
9817 
9818   simple_order is set to 1 if sort_order only uses fields from head table
9819   and the head table is not a LEFT JOIN table.
9820 
9821   @param first_order            List of SORT or GROUP order
9822   @param cond                   WHERE statement
9823   @param change_list            Set to 1 if we should remove things from list.
9824                                 If this is not set, then only simple_order is
9825                                 calculated.
9826   @param simple_order           Set to 1 if we are only using simple expressions
9827   @param clause_type            "ORDER BY" etc for printing in optimizer trace
9828 
9829   @return
9830     Returns new sort order
9831 */
9832 
remove_const(ORDER * first_order,Item * cond,bool change_list,bool * simple_order,const char * clause_type)9833 ORDER *JOIN::remove_const(ORDER *first_order, Item *cond, bool change_list,
9834                           bool *simple_order, const char *clause_type)
9835 {
9836   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
9837 
9838   if (plan_is_const())
9839     return change_list ? 0 : first_order;		// No need to sort
9840 
9841   Opt_trace_context * const trace= &thd->opt_trace;
9842   Opt_trace_disable_I_S trace_disabled(trace, first_order == NULL);
9843   Opt_trace_object trace_wrapper(trace);
9844   Opt_trace_object trace_simpl(trace, "clause_processing");
9845   if (trace->is_started())
9846   {
9847     trace_simpl.add_alnum("clause", clause_type);
9848     String str;
9849     st_select_lex::print_order(&str, first_order,
9850                                enum_query_type(QT_TO_SYSTEM_CHARSET |
9851                                                QT_SHOW_SELECT_NUMBER |
9852                                                QT_NO_DEFAULT_DB));
9853     trace_simpl.add_utf8("original_clause", str.ptr(), str.length());
9854   }
9855   Opt_trace_array trace_each_item(trace, "items");
9856 
9857   ORDER *order,**prev_ptr;
9858   JOIN_TAB *const first_tab= best_ref[const_tables];
9859   table_map first_table= first_tab->table_ref->map();
9860   table_map not_const_tables= ~const_table_map;
9861   table_map ref;
9862   // Caches to avoid repeating eq_ref_table() calls, @see eq_ref_table()
9863   table_map eq_ref_tables= 0, cached_eq_ref_tables= 0;
9864   DBUG_ENTER("JOIN::remove_const");
9865 
9866   prev_ptr= &first_order;
9867   *simple_order= !first_tab->join_cond();
9868 
9869   /* NOTE: A variable of not_const_tables ^ first_table; breaks gcc 2.7 */
9870 
9871   update_depend_map(first_order);
9872   for (order=first_order; order ; order=order->next)
9873   {
9874     Opt_trace_object trace_one_item(trace);
9875     trace_one_item.add("item", order->item[0]);
9876     table_map order_tables=order->item[0]->used_tables();
9877     if (order->item[0]->with_sum_func ||
9878         /*
9879           If the outer table of an outer join is const (either by itself or
9880           after applying WHERE condition), grouping on a field from such a
9881           table will be optimized away and filesort without temporary table
9882           will be used unless we prevent that now. Filesort is not fit to
9883           handle joins and the join condition is not applied. We can't detect
9884           the case without an expensive test, however, so we force temporary
9885           table for all queries containing more than one table, ROLLUP, and an
9886           outer join.
9887          */
9888         (primary_tables > 1 &&
9889          rollup.state == ROLLUP::STATE_INITED &&
9890          select_lex->outer_join))
9891       *simple_order= 0;                // Must do a temp table to sort
9892     else if (!(order_tables & not_const_tables))
9893     {
9894       if (order->item[0]->has_subquery())
9895       {
9896         if (!thd->lex->is_explain())
9897         {
9898           Opt_trace_array trace_subselect(trace, "subselect_evaluation");
9899           order->item[0]->val_str(&order->item[0]->str_value);
9900         }
9901         order->item[0]->mark_subqueries_optimized_away();
9902       }
9903       trace_one_item.add("uses_only_constant_tables", true);
9904       continue;                        // skip const item
9905     }
9906     else if (duplicate_order(first_order, order))
9907     {
9908       /*
9909         If 'order' is a duplicate of an expression earlier in the
9910         ORDER/GROUP BY sequence, it can be removed from the ORDER BY
9911         or GROUP BY clause.
9912       */
9913       trace_one_item.add("duplicate_item", true);
9914       continue;
9915     }
9916     else if (order->in_field_list && order->item[0]->has_subquery())
9917       /*
9918         If the order item is a subquery that is also in the field
9919         list, a temp table should be used to avoid evaluating the
9920         subquery for each row both when a) creating a sort index and
9921         b) getting the value.
9922           Example: "SELECT (SELECT ... ) as a ... GROUP BY a;"
9923        */
9924       *simple_order= false;
9925     else
9926     {
9927       if (order_tables & (RAND_TABLE_BIT | OUTER_REF_TABLE_BIT))
9928 	*simple_order=0;
9929       else
9930       {
9931 	if (cond && const_expression_in_where(cond,order->item[0]))
9932 	{
9933           trace_one_item.add("equals_constant_in_where", true);
9934 	  continue;
9935 	}
9936 	if ((ref=order_tables & (not_const_tables ^ first_table)))
9937 	{
9938 	  if (!(order_tables & first_table) &&
9939               only_eq_ref_tables(this, first_order, ref,
9940                                  &cached_eq_ref_tables, &eq_ref_tables))
9941 	  {
9942             trace_one_item.add("eq_ref_to_preceding_items", true);
9943 	    continue;
9944 	  }
9945 	  *simple_order=0;			// Must do a temp table to sort
9946 	}
9947       }
9948     }
9949     if (change_list)
9950       *prev_ptr= order;				// use this entry
9951     prev_ptr= &order->next;
9952   }
9953   if (change_list)
9954     *prev_ptr=0;
9955   if (prev_ptr == &first_order)			// Nothing to sort/group
9956     *simple_order=1;
9957   DBUG_PRINT("exit",("simple_order: %d",(int) *simple_order));
9958 
9959   trace_each_item.end();
9960   trace_simpl.add("resulting_clause_is_simple", *simple_order);
9961   if (trace->is_started() && change_list)
9962   {
9963     String str;
9964     st_select_lex::print_order(&str, first_order,
9965                                enum_query_type(QT_TO_SYSTEM_CHARSET |
9966                                                QT_SHOW_SELECT_NUMBER |
9967                                                QT_NO_DEFAULT_DB));
9968     trace_simpl.add_utf8("resulting_clause", str.ptr(), str.length());
9969   }
9970 
9971   DBUG_RETURN(first_order);
9972 }
9973 
9974 
9975 /**
9976   Optimize conditions by
9977 
9978      a) applying transitivity to build multiple equality predicates
9979         (MEP): if x=y and y=z the MEP x=y=z is built.
9980      b) apply constants where possible. If the value of x is known to be
9981         42, x is replaced with a constant of value 42. By transitivity, this
9982         also applies to MEPs, so the MEP in a) will become 42=x=y=z.
9983      c) remove conditions that are always false or always true
9984 
9985   @param thd              Thread handler
9986   @param[in,out] cond     WHERE or HAVING condition to optimize
9987   @param[out] cond_equal  The built multiple equalities
9988   @param join_list        list of join operations with join conditions
9989                           = NULL: Called for HAVING condition
9990   @param[out] cond_value  Not changed if cond was empty
9991                             COND_TRUE if cond is always true
9992                             COND_FALSE if cond is impossible
9993                             COND_OK otherwise
9994 
9995   @returns false if success, true if error
9996 */
9997 
optimize_cond(THD * thd,Item ** cond,COND_EQUAL ** cond_equal,List<TABLE_LIST> * join_list,Item::cond_result * cond_value)9998 bool optimize_cond(THD *thd, Item **cond, COND_EQUAL **cond_equal,
9999                    List<TABLE_LIST> *join_list,
10000                    Item::cond_result *cond_value)
10001 {
10002   Opt_trace_context * const trace= &thd->opt_trace;
10003   DBUG_ENTER("optimize_cond");
10004 
10005   Opt_trace_object trace_wrapper(trace);
10006   Opt_trace_object trace_cond(trace, "condition_processing");
10007   trace_cond.add_alnum("condition", join_list ? "WHERE" : "HAVING");
10008   trace_cond.add("original_condition", *cond);
10009   Opt_trace_array trace_steps(trace, "steps");
10010 
10011   /*
10012     Enter this function
10013     a) For a WHERE condition or a query having outer join.
10014     b) For a HAVING condition.
10015   */
10016   assert(*cond || join_list);
10017 
10018   /*
10019     Build all multiple equality predicates and eliminate equality
10020     predicates that can be inferred from these multiple equalities.
10021     For each reference of a field included into a multiple equality
10022     that occurs in a function set a pointer to the multiple equality
10023     predicate. Substitute a constant instead of this field if the
10024     multiple equality contains a constant.
10025     This is performed for the WHERE condition and any join conditions, but
10026     not for the HAVING condition.
10027   */
10028   if (join_list)
10029   {
10030     Opt_trace_object step_wrapper(trace);
10031     step_wrapper.add_alnum("transformation", "equality_propagation");
10032     {
10033       Opt_trace_disable_I_S
10034         disable_trace_wrapper(trace, !(*cond && (*cond)->has_subquery()));
10035       Opt_trace_array
10036         trace_subselect(trace, "subselect_evaluation");
10037       if (build_equal_items(thd, *cond, cond, NULL, true,
10038                             join_list, cond_equal))
10039         DBUG_RETURN(true);
10040     }
10041     step_wrapper.add("resulting_condition", *cond);
10042   }
10043   /* change field = field to field = const for each found field = const */
10044   if (*cond)
10045   {
10046     Opt_trace_object step_wrapper(trace);
10047     step_wrapper.add_alnum("transformation", "constant_propagation");
10048     {
10049       Opt_trace_disable_I_S
10050         disable_trace_wrapper(trace, !(*cond)->has_subquery());
10051       Opt_trace_array trace_subselect(trace, "subselect_evaluation");
10052       if (propagate_cond_constants(thd, NULL, *cond, *cond))
10053         DBUG_RETURN(true);
10054     }
10055     step_wrapper.add("resulting_condition", *cond);
10056   }
10057 
10058   /*
10059     Remove all instances of item == item
10060     Remove all and-levels where CONST item != CONST item
10061   */
10062   DBUG_EXECUTE("where",print_where(*cond,"after const change", QT_ORDINARY););
10063   if (*cond)
10064   {
10065     Opt_trace_object step_wrapper(trace);
10066     step_wrapper.add_alnum("transformation", "trivial_condition_removal");
10067     {
10068       Opt_trace_disable_I_S
10069         disable_trace_wrapper(trace, !(*cond)->has_subquery());
10070       Opt_trace_array trace_subselect(trace, "subselect_evaluation");
10071       if (remove_eq_conds(thd, *cond, cond, cond_value))
10072         DBUG_RETURN(true);
10073     }
10074     step_wrapper.add("resulting_condition", *cond);
10075   }
10076   assert(!thd->is_error());
10077   if (thd->is_error())
10078     DBUG_RETURN(true);
10079   DBUG_RETURN(false);
10080 }
10081 
10082 
10083 /**
10084   Handle the recursive job for remove_eq_conds()
10085 
10086   @param thd             Thread handler
10087   @param cond            the condition to handle.
10088   @param[out] retcond    Modified condition after removal
10089   @param[out] cond_value the resulting value of the condition
10090 
10091   @see remove_eq_conds() for more details on argument
10092 
10093   @returns false if success, true if error
10094 */
10095 
internal_remove_eq_conds(THD * thd,Item * cond,Item ** retcond,Item::cond_result * cond_value)10096 static bool internal_remove_eq_conds(THD *thd, Item *cond,
10097                                      Item **retcond,
10098                                      Item::cond_result *cond_value)
10099 {
10100   if (cond->type() == Item::COND_ITEM)
10101   {
10102     Item_cond *const item_cond= down_cast<Item_cond *>(cond);
10103     const bool and_level= item_cond->functype() == Item_func::COND_AND_FUNC;
10104     List_iterator<Item> li(*item_cond->argument_list());
10105     bool should_fix_fields= false;
10106 
10107     *cond_value=Item::COND_UNDEF;
10108     Item *item;
10109     while ((item=li++))
10110     {
10111       Item *new_item;
10112       Item::cond_result tmp_cond_value;
10113       if (internal_remove_eq_conds(thd, item, &new_item, &tmp_cond_value))
10114         return true;
10115 
10116       if (new_item == NULL)
10117         li.remove();
10118       else if (item != new_item)
10119       {
10120         (void) li.replace(new_item);
10121         should_fix_fields= true;
10122       }
10123       if (*cond_value == Item::COND_UNDEF)
10124          *cond_value= tmp_cond_value;
10125       switch (tmp_cond_value)
10126       {
10127       case Item::COND_OK:                       // Not TRUE or FALSE
10128         if (and_level || *cond_value == Item::COND_FALSE)
10129           *cond_value= tmp_cond_value;
10130         break;
10131       case Item::COND_FALSE:
10132         if (and_level)                          // Always false
10133         {
10134           *cond_value= tmp_cond_value;
10135           *retcond= NULL;
10136           return false;
10137         }
10138         break;
10139       case Item::COND_TRUE:
10140         if (!and_level)                         // Always true
10141         {
10142           *cond_value= tmp_cond_value;
10143           *retcond= NULL;
10144           return false;
10145         }
10146         break;
10147       case Item::COND_UNDEF:			// Impossible
10148         assert(false);                     /* purecov: deadcode */
10149       }
10150     }
10151     if (should_fix_fields)
10152       item_cond->update_used_tables();
10153 
10154     if (item_cond->argument_list()->elements == 0 ||
10155         *cond_value != Item::COND_OK)
10156     {
10157       *retcond= NULL;
10158       return false;
10159     }
10160     if (item_cond->argument_list()->elements == 1)
10161     {
10162       /*
10163         BUG#11765699:
10164         We're dealing with an AND or OR item that has only one
10165         argument. However, it is not an option to empty the list
10166         because:
10167 
10168          - this function is called for either JOIN::conds or
10169            JOIN::having, but these point to the same condition as
10170            SELECT_LEX::where and SELECT_LEX::having do.
10171 
10172          - The return value of remove_eq_conds() is assigned to
10173            JOIN::conds and JOIN::having, so emptying the list and
10174            returning the only remaining item "replaces" the AND or OR
10175            with item for the variables in JOIN. However, the return
10176            value is not assigned to the SELECT_LEX counterparts. Thus,
10177            if argument_list is emptied, SELECT_LEX forgets the item in
10178            argument_list()->head().
10179 
10180         item is therefore returned, but argument_list is not emptied.
10181       */
10182       item= item_cond->argument_list()->head();
10183       /*
10184         Consider reenabling the line below when the optimizer has been
10185         split into properly separated phases.
10186 
10187         item_cond->argument_list()->empty();
10188       */
10189       *retcond= item;
10190       return false;
10191     }
10192   }
10193   else if (cond->type() == Item::FUNC_ITEM &&
10194            down_cast<Item_func *>(cond)->functype() == Item_func::ISNULL_FUNC)
10195   {
10196     Item_func_isnull *const func= down_cast<Item_func_isnull *>(cond);
10197     Item **args= func->arguments();
10198     if (args[0]->type() == Item::FIELD_ITEM)
10199     {
10200       Field *const field= down_cast<Item_field *>(args[0])->field;
10201       /* fix to replace 'NULL' dates with '0' (shreeve@uci.edu) */
10202       /*
10203         See BUG#12594011
10204         Documentation says that
10205         SELECT datetime_notnull d FROM t1 WHERE d IS NULL
10206         shall return rows where d=='0000-00-00'
10207 
10208         Thus, for DATE and DATETIME columns defined as NOT NULL,
10209         "date_notnull IS NULL" has to be modified to
10210         "date_notnull IS NULL OR date_notnull == 0" (if outer join)
10211         "date_notnull == 0"                         (otherwise)
10212 
10213       */
10214       if (((field->type() == MYSQL_TYPE_DATE) ||
10215            (field->type() == MYSQL_TYPE_DATETIME)) &&
10216           (field->flags & NOT_NULL_FLAG))
10217       {
10218         Item *item0= new(thd->mem_root) Item_int((longlong)0, 1);
10219         if (item0 == NULL)
10220           return true;
10221         Item *eq_cond= new(thd->mem_root) Item_func_eq(args[0], item0);
10222         if (eq_cond == NULL)
10223           return true;
10224 
10225         if (args[0]->is_outer_field())
10226         {
10227           // outer join: transform "col IS NULL" to "col IS NULL or col=0"
10228           Item *or_cond= new(thd->mem_root) Item_cond_or(eq_cond, cond);
10229           if (or_cond == NULL)
10230             return true;
10231           cond= or_cond;
10232         }
10233         else
10234         {
10235           // not outer join: transform "col IS NULL" to "col=0"
10236           cond= eq_cond;
10237         }
10238 
10239         if (cond->fix_fields(thd, &cond))
10240           return true;
10241       }
10242     }
10243     if (cond->const_item())
10244     {
10245       bool value;
10246       if (eval_const_cond(thd, cond, &value))
10247         return true;
10248       *cond_value= value ? Item::COND_TRUE : Item::COND_FALSE;
10249       *retcond= NULL;
10250       return false;
10251     }
10252   }
10253   else if (cond->const_item() && !cond->is_expensive())
10254   {
10255     bool value;
10256     if (eval_const_cond(thd, cond, &value))
10257       return true;
10258     *cond_value= value ? Item::COND_TRUE : Item::COND_FALSE;
10259     *retcond= NULL;
10260     return false;
10261   }
10262   else
10263   {                                             // boolan compare function
10264     *cond_value= cond->eq_cmp_result();
10265     if (*cond_value == Item::COND_OK)
10266     {
10267       *retcond= cond;
10268       return false;
10269     }
10270     Item *left_item= down_cast<Item_func *>(cond)->arguments()[0];
10271     Item *right_item= down_cast<Item_func *>(cond)->arguments()[1];
10272     if (left_item->eq(right_item,1))
10273     {
10274       if (!left_item->maybe_null ||
10275           down_cast<Item_func *>(cond)->functype() == Item_func::EQUAL_FUNC)
10276       {
10277         *retcond= NULL;
10278         return false;                           // Compare of identical items
10279       }
10280     }
10281   }
10282   *cond_value= Item::COND_OK;
10283   *retcond= cond;                               // Point at next and level
10284   return false;
10285 }
10286 
10287 
10288 /**
10289   Remove const and eq items. Return new item, or NULL if no condition
10290 
10291   @param      thd        thread handler
10292   @param      cond       the condition to handle
10293   @param[out] retcond    condition after const removal
10294   @param[out] cond_value resulting value of the condition
10295               =COND_OK    condition must be evaluated (e.g field = constant)
10296               =COND_TRUE  always true                 (e.g 1 = 1)
10297               =COND_FALSE always false                (e.g 1 = 2)
10298 
10299   @note calls internal_remove_eq_conds() to check the complete tree.
10300 
10301   @returns false if success, true if error
10302 */
10303 
remove_eq_conds(THD * thd,Item * cond,Item ** retcond,Item::cond_result * cond_value)10304 bool remove_eq_conds(THD *thd, Item *cond, Item **retcond,
10305                      Item::cond_result *cond_value)
10306 {
10307   if (cond->type() == Item::FUNC_ITEM &&
10308       down_cast<Item_func *>(cond)->functype() == Item_func::ISNULL_FUNC)
10309   {
10310     /*
10311       Handles this special case for some ODBC applications:
10312       The are requesting the row that was just updated with a auto_increment
10313       value with this construct:
10314 
10315       SELECT * from table_name where auto_increment_column IS NULL
10316       This will be changed to:
10317       SELECT * from table_name where auto_increment_column = LAST_INSERT_ID
10318     */
10319 
10320     Item_func_isnull *const func= down_cast<Item_func_isnull *>(cond);
10321     Item **args= func->arguments();
10322     if (args[0]->type() == Item::FIELD_ITEM)
10323     {
10324       Field *const field= down_cast<Item_field *>(args[0])->field;
10325       if ((field->flags & AUTO_INCREMENT_FLAG) &&
10326           !field->table->is_nullable() &&
10327 	  (thd->variables.option_bits & OPTION_AUTO_IS_NULL) &&
10328 	  (thd->first_successful_insert_id_in_prev_stmt > 0 &&
10329            thd->substitute_null_with_insert_id))
10330       {
10331         query_cache.abort(&thd->query_cache_tls);
10332 
10333         cond= new Item_func_eq(
10334                 args[0],
10335                 new Item_int(NAME_STRING("last_insert_id()"),
10336                             thd->read_first_successful_insert_id_in_prev_stmt(),
10337                              MY_INT64_NUM_DECIMAL_DIGITS));
10338         if (cond == NULL)
10339           return true;
10340 
10341         if (cond->fix_fields(thd, &cond))
10342           return true;
10343 
10344         /*
10345           IS NULL should be mapped to LAST_INSERT_ID only for first row, so
10346           clear for next row
10347         */
10348         thd->substitute_null_with_insert_id= FALSE;
10349 
10350         *cond_value= Item::COND_OK;
10351         *retcond= cond;
10352         return false;
10353       }
10354     }
10355   }
10356   return internal_remove_eq_conds(thd, cond, retcond, cond_value);
10357 }
10358 
10359 
10360 /**
10361   Check if GROUP BY/DISTINCT can be optimized away because the set is
10362   already known to be distinct.
10363 
10364   Used in removing the GROUP BY/DISTINCT of the following types of
10365   statements:
10366   @code
10367     SELECT [DISTINCT] <unique_key_cols>... FROM <single_table_ref>
10368       [GROUP BY <unique_key_cols>,...]
10369   @endcode
10370 
10371     If (a,b,c is distinct)
10372     then <any combination of a,b,c>,{whatever} is also distinct
10373 
10374     This function checks if all the key parts of any of the unique keys
10375     of the table are referenced by a list : either the select list
10376     through find_field_in_item_list or GROUP BY list through
10377     find_field_in_order_list.
10378     If the above holds and the key parts cannot contain NULLs then we
10379     can safely remove the GROUP BY/DISTINCT,
10380     as no result set can be more distinct than an unique key.
10381 
10382   @param tab                  The join table to operate on.
10383   @param find_func            function to iterate over the list and search
10384                               for a field
10385 
10386   @retval
10387     1                    found
10388   @retval
10389     0                    not found.
10390 
10391   @note
10392     The function assumes that make_outerjoin_info() has been called in
10393     order for the check for outer tables to work.
10394 */
10395 
10396 static bool
list_contains_unique_index(JOIN_TAB * tab,bool (* find_func)(Field *,void *),void * data)10397 list_contains_unique_index(JOIN_TAB *tab,
10398                           bool (*find_func) (Field *, void *), void *data)
10399 {
10400   TABLE *table= tab->table();
10401 
10402   if (tab->is_inner_table_of_outer_join())
10403     return 0;
10404   for (uint keynr= 0; keynr < table->s->keys; keynr++)
10405   {
10406     if (keynr == table->s->primary_key ||
10407          (table->key_info[keynr].flags & HA_NOSAME))
10408     {
10409       KEY *keyinfo= table->key_info + keynr;
10410       KEY_PART_INFO *key_part, *key_part_end;
10411 
10412       for (key_part=keyinfo->key_part,
10413            key_part_end=key_part+ keyinfo->user_defined_key_parts;
10414            key_part < key_part_end;
10415            key_part++)
10416       {
10417         if (key_part->field->real_maybe_null() ||
10418             !find_func(key_part->field, data))
10419           break;
10420       }
10421       if (key_part == key_part_end)
10422         return 1;
10423     }
10424   }
10425   return 0;
10426 }
10427 
10428 
10429 /**
10430   Helper function for list_contains_unique_index.
10431   Find a field reference in a list of ORDER structures.
10432   Finds a direct reference of the Field in the list.
10433 
10434   @param field                The field to search for.
10435   @param data                 ORDER *.The list to search in
10436 
10437   @retval
10438     1                    found
10439   @retval
10440     0                    not found.
10441 */
10442 
10443 static bool
find_field_in_order_list(Field * field,void * data)10444 find_field_in_order_list (Field *field, void *data)
10445 {
10446   ORDER *group= (ORDER *) data;
10447   bool part_found= 0;
10448   for (ORDER *tmp_group= group; tmp_group; tmp_group=tmp_group->next)
10449   {
10450     Item *item= (*tmp_group->item)->real_item();
10451     if (item->type() == Item::FIELD_ITEM &&
10452         ((Item_field*) item)->field->eq(field))
10453     {
10454       part_found= 1;
10455       break;
10456     }
10457   }
10458   return part_found;
10459 }
10460 
10461 
10462 /**
10463   Helper function for list_contains_unique_index.
10464   Find a field reference in a dynamic list of Items.
10465   Finds a direct reference of the Field in the list.
10466 
10467   @param[in] field             The field to search for.
10468   @param[in] data              List<Item> *.The list to search in
10469 
10470   @retval
10471     1                    found
10472   @retval
10473     0                    not found.
10474 */
10475 
10476 static bool
find_field_in_item_list(Field * field,void * data)10477 find_field_in_item_list (Field *field, void *data)
10478 {
10479   List<Item> *fields= (List<Item> *) data;
10480   bool part_found= 0;
10481   List_iterator<Item> li(*fields);
10482   Item *item;
10483 
10484   while ((item= li++))
10485   {
10486     if (item->type() == Item::FIELD_ITEM &&
10487         ((Item_field*) item)->field->eq(field))
10488     {
10489       part_found= 1;
10490       break;
10491     }
10492   }
10493   return part_found;
10494 }
10495 
10496 
10497 /**
10498   Create a group by that consist of all non const fields.
10499 
10500   Try to use the fields in the order given by 'order' to allow one to
10501   optimize away 'order by'.
10502 */
10503 
10504 static ORDER *
create_distinct_group(THD * thd,Ref_ptr_array ref_pointer_array,ORDER * order_list,List<Item> & fields,List<Item> & all_fields,bool * all_order_by_fields_used)10505 create_distinct_group(THD *thd, Ref_ptr_array ref_pointer_array,
10506                       ORDER *order_list, List<Item> &fields,
10507                       List<Item> &all_fields,
10508 		      bool *all_order_by_fields_used)
10509 {
10510   List_iterator<Item> li(fields);
10511   Item *item;
10512   ORDER *order,*group,**prev;
10513 
10514   *all_order_by_fields_used= 1;
10515   while ((item=li++))
10516     item->marker=0;			/* Marker that field is not used */
10517 
10518   prev= &group;  group=0;
10519   for (order=order_list ; order; order=order->next)
10520   {
10521     if (order->in_field_list)
10522     {
10523       ORDER *ord=(ORDER*) thd->memdup((char*) order,sizeof(ORDER));
10524       if (!ord)
10525 	return 0;
10526       *prev=ord;
10527       prev= &ord->next;
10528       (*ord->item)->marker=1;
10529     }
10530     else
10531       *all_order_by_fields_used= 0;
10532   }
10533 
10534   li.rewind();
10535   while ((item=li++))
10536   {
10537     if (!item->const_item() && !item->with_sum_func && !item->marker)
10538     {
10539       /*
10540         Don't put duplicate columns from the SELECT list into the
10541         GROUP BY list.
10542       */
10543       ORDER *ord_iter;
10544       for (ord_iter= group; ord_iter; ord_iter= ord_iter->next)
10545         if ((*ord_iter->item)->eq(item, 1))
10546           goto next_item;
10547 
10548       ORDER *ord=(ORDER*) thd->mem_calloc(sizeof(ORDER));
10549       if (!ord)
10550 	return 0;
10551 
10552       if (item->type() == Item::FIELD_ITEM &&
10553           item->field_type() == MYSQL_TYPE_BIT)
10554       {
10555         /*
10556           Because HEAP tables can't index BIT fields we need to use an
10557           additional hidden field for grouping because later it will be
10558           converted to a LONG field. Original field will remain of the
10559           BIT type and will be returned to a client.
10560           @note setup_ref_array() needs to account for the extra space.
10561         */
10562         Item_field *new_item= new Item_field(thd, (Item_field*)item);
10563         ord->item= thd->lex->current_select()->add_hidden_item(new_item);
10564       }
10565       else
10566       {
10567         /*
10568           We have here only field_list (not all_field_list), so we can use
10569           simple indexing of ref_pointer_array (order in the array and in the
10570           list are same)
10571         */
10572         ord->item= &ref_pointer_array[0];
10573       }
10574       ord->direction= ORDER::ORDER_ASC;
10575       *prev=ord;
10576       prev= &ord->next;
10577     }
10578 next_item:
10579     ref_pointer_array.pop_front();
10580   }
10581   *prev=0;
10582   return group;
10583 }
10584 
10585 
10586 /**
10587   Return table number if there is only one table in sort order
10588   and group and order is compatible, else return 0.
10589 */
10590 
10591 static TABLE *
get_sort_by_table(ORDER * a,ORDER * b,TABLE_LIST * tables)10592 get_sort_by_table(ORDER *a,ORDER *b,TABLE_LIST *tables)
10593 {
10594   table_map map= (table_map) 0;
10595   DBUG_ENTER("get_sort_by_table");
10596 
10597   if (!a)
10598     a=b;					// Only one need to be given
10599   else if (!b)
10600     b=a;
10601 
10602   for (; a && b; a=a->next,b=b->next)
10603   {
10604     if (!(*a->item)->eq(*b->item,1))
10605       DBUG_RETURN(0);
10606     map|=a->item[0]->used_tables();
10607   }
10608   map&= ~PARAM_TABLE_BIT;
10609   if (!map || (map & (RAND_TABLE_BIT | OUTER_REF_TABLE_BIT)))
10610     DBUG_RETURN(0);
10611 
10612   for (; !(map & tables->map()); tables= tables->next_leaf) ;
10613   if (map != tables->map())
10614     DBUG_RETURN(0);				// More than one table
10615   DBUG_PRINT("exit",("sort by table: %d",tables->tableno()));
10616   DBUG_RETURN(tables->table);
10617 }
10618 
10619 
10620 /**
10621   Create a condition for a const reference for a table.
10622 
10623   @param thd      THD pointer
10624   @param join_tab pointer to the table
10625 
10626   @return A pointer to the created condition for the const reference.
10627   @retval !NULL if the condition was created successfully
10628   @retval NULL if an error has occured
10629 */
10630 
create_cond_for_const_ref(THD * thd,JOIN_TAB * join_tab)10631 static Item_cond_and *create_cond_for_const_ref(THD *thd, JOIN_TAB *join_tab)
10632 {
10633   DBUG_ENTER("create_cond_for_const_ref");
10634   assert(join_tab->ref().key_parts);
10635 
10636   TABLE *table= join_tab->table();
10637   Item_cond_and *cond= new Item_cond_and();
10638   if (!cond)
10639     DBUG_RETURN(NULL);
10640 
10641   for (uint i=0 ; i < join_tab->ref().key_parts ; i++)
10642   {
10643     Field *field= table->field[table->key_info[join_tab->ref().key].key_part[i].
10644                                fieldnr-1];
10645     Item *value= join_tab->ref().items[i];
10646     Item *item= new Item_field(field);
10647     if (!item)
10648       DBUG_RETURN(NULL);
10649     item= join_tab->ref().null_rejecting & ((key_part_map)1 << i) ?
10650             (Item *)new Item_func_eq(item, value) :
10651             (Item *)new Item_func_equal(item, value);
10652     if (!item)
10653       DBUG_RETURN(NULL);
10654     if (cond->add(item))
10655       DBUG_RETURN(NULL);
10656   }
10657   cond->fix_fields(thd, (Item**)&cond);
10658 
10659   DBUG_RETURN(cond);
10660 }
10661 
10662 /**
10663   Create a condition for a const reference and add this to the
10664   currenct select for the table.
10665 */
10666 
add_ref_to_table_cond(THD * thd,JOIN_TAB * join_tab)10667 static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab)
10668 {
10669   DBUG_ENTER("add_ref_to_table_cond");
10670   if (!join_tab->ref().key_parts)
10671     DBUG_RETURN(FALSE);
10672 
10673   int error= 0;
10674 
10675   /* Create a condition representing the const reference. */
10676   Item_cond_and *cond= create_cond_for_const_ref(thd, join_tab);
10677   if (!cond)
10678     DBUG_RETURN(TRUE);
10679 
10680   /* Add this condition to the existing select condtion */
10681   if (join_tab->condition())
10682   {
10683     error=(int) cond->add(join_tab->condition());
10684     cond->update_used_tables();
10685   }
10686   join_tab->set_condition(cond);
10687   Opt_trace_object(&thd->opt_trace).add("added_back_ref_condition", cond);
10688 
10689   DBUG_RETURN(error ? TRUE : FALSE);
10690 }
10691 
10692 
10693 /**
10694   Remove additional condition inserted by IN/ALL/ANY transformation.
10695 
10696   @param conds   condition for processing
10697 
10698   @return
10699     new conditions
10700 
10701   @note that this function has Bug#13915291.
10702 */
10703 
remove_additional_cond(Item * conds)10704 static Item *remove_additional_cond(Item* conds)
10705 {
10706   // Because it uses in_additional_cond it applies only to the scalar case.
10707   if (conds->item_name.ptr() == in_additional_cond)
10708     return 0;
10709   if (conds->type() == Item::COND_ITEM)
10710   {
10711     Item_cond *cnd= (Item_cond*) conds;
10712     List_iterator<Item> li(*(cnd->argument_list()));
10713     Item *item;
10714     while ((item= li++))
10715     {
10716       if (item->item_name.ptr() == in_additional_cond)
10717       {
10718 	li.remove();
10719 	if (cnd->argument_list()->elements == 1)
10720 	  return cnd->argument_list()->head();
10721 	return conds;
10722       }
10723     }
10724   }
10725   return conds;
10726 }
10727 
10728 
10729 /**
10730   Update some values in keyuse for faster choose_table_order() loop.
10731 
10732   @todo Check if this is the real meaning of ref_table_rows.
10733 
10734   @param keyuse_array  Array of Key_use elements being updated.
10735 
10736 
10737 */
10738 
optimize_keyuse()10739 void JOIN::optimize_keyuse()
10740 {
10741   for (size_t ix= 0; ix < keyuse_array.size(); ++ix)
10742   {
10743     Key_use *keyuse= &keyuse_array.at(ix);
10744     table_map map;
10745     /*
10746       If we find a ref, assume this table matches a proportional
10747       part of this table.
10748       For example 100 records matching a table with 5000 records
10749       gives 5000/100 = 50 records per key
10750       Constant tables are ignored.
10751       To avoid bad matches, we don't make ref_table_rows less than 100.
10752     */
10753     keyuse->ref_table_rows= ~(ha_rows) 0;	// If no ref
10754     if (keyuse->used_tables &
10755        (map= (keyuse->used_tables & ~const_table_map & ~PSEUDO_TABLE_BITS)))
10756     {
10757       uint tableno;
10758       for (tableno= 0; ! (map & 1) ; map>>=1, tableno++)
10759       {}
10760       if (map == 1)			// Only one table
10761       {
10762 	TABLE *tmp_table= join_tab[tableno].table();
10763 
10764 	keyuse->ref_table_rows= max<ha_rows>(tmp_table->file->stats.records, 100);
10765       }
10766     }
10767     /*
10768       Outer reference (external field) is constant for single executing
10769       of subquery
10770     */
10771     if (keyuse->used_tables == OUTER_REF_TABLE_BIT)
10772       keyuse->ref_table_rows= 1;
10773   }
10774 }
10775 
10776 /**
10777   Function sets FT hints, initializes FT handlers
10778   and checks if FT index can be used as covered.
10779 */
10780 
optimize_fts_query()10781 bool JOIN::optimize_fts_query()
10782 {
10783   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
10784 
10785   assert(select_lex->has_ft_funcs());
10786 
10787   for (uint i= const_tables; i < tables; i++)
10788   {
10789     JOIN_TAB *tab= best_ref[i];
10790     if (tab->type() != JT_FT)
10791       continue;
10792 
10793     Item_func_match *ifm;
10794     Item_func_match* ft_func=
10795       static_cast<Item_func_match*>(tab->position()->key->val);
10796     List_iterator<Item_func_match> li(*(select_lex->ftfunc_list));
10797 
10798     while ((ifm= li++))
10799     {
10800       if (!(ifm->used_tables() & tab->table_ref->map()) || ifm->master)
10801         continue;
10802 
10803       if (ifm != ft_func)
10804       {
10805         if (ifm->can_skip_ranking())
10806           ifm->set_hints(this, FT_NO_RANKING, HA_POS_ERROR, false);
10807       }
10808     }
10809 
10810     /*
10811       Check if internal sorting is needed. FT_SORTED flag is set
10812       if no ORDER BY clause or ORDER BY MATCH function is the same
10813       as the function that is used for FT index and FT table is
10814       the first non-constant table in the JOIN.
10815     */
10816     if (i == const_tables &&
10817         !(ft_func->get_hints()->get_flags() & FT_BOOL) &&
10818         (!order || ft_func == test_if_ft_index_order(order)))
10819       ft_func->set_hints(this, FT_SORTED, m_select_limit, false);
10820 
10821     /*
10822       Check if ranking is not needed. FT_NO_RANKING flag is set if
10823       MATCH function is used only in WHERE condition and  MATCH
10824       function is not part of an expression.
10825     */
10826     if (ft_func->can_skip_ranking())
10827       ft_func->set_hints(this, FT_NO_RANKING,
10828                          !order ? m_select_limit : HA_POS_ERROR, false);
10829   }
10830 
10831   return init_ftfuncs(thd, select_lex);
10832 }
10833 
10834 
10835 /**
10836   Check if FTS index only access is possible.
10837 
10838   @param tab  pointer to JOIN_TAB structure.
10839 
10840   @return  TRUE if index only access is possible,
10841            FALSE otherwise.
10842 */
10843 
fts_index_access(JOIN_TAB * tab)10844 bool JOIN::fts_index_access(JOIN_TAB *tab)
10845 {
10846   assert(tab->type() == JT_FT);
10847   TABLE *table= tab->table();
10848 
10849   if ((table->file->ha_table_flags() & HA_CAN_FULLTEXT_EXT) == 0)
10850     return false; // Optimizations requires extended FTS support by table engine
10851 
10852   /*
10853     This optimization does not work with filesort nor GROUP BY
10854   */
10855   if (grouped || (order && ordered_index_usage != ordered_index_order_by))
10856     return false;
10857 
10858   /*
10859     Check whether the FTS result is covering.  If only document id
10860     and rank is needed, there is no need to access table rows.
10861   */
10862   for (uint i= bitmap_get_first_set(table->read_set);
10863        i < table->s->fields;
10864        i= bitmap_get_next_set(table->read_set, i))
10865   {
10866     if (table->field[i] != table->fts_doc_id_field ||
10867         !tab->ft_func()->docid_in_result())
10868     return false;
10869   }
10870 
10871   return true;
10872 }
10873 
10874 
10875 /**
10876    For {semijoin,subquery} materialization: calculates various cost
10877    information, based on a plan in join->best_positions covering the
10878    to-be-materialized query block and only this.
10879 
10880    @param join     JOIN where plan can be found
10881    @param sj_nest  sj materialization nest (NULL if subquery materialization)
10882    @param n_tables number of to-be-materialized tables
10883    @param[out] sjm where computed costs will be stored
10884 
10885    @note that this function modifies join->map2table, which has to be filled
10886    correctly later.
10887 */
calculate_materialization_costs(JOIN * join,TABLE_LIST * sj_nest,uint n_tables,Semijoin_mat_optimize * sjm)10888 static void calculate_materialization_costs(JOIN *join,
10889                                             TABLE_LIST *sj_nest,
10890                                             uint n_tables,
10891                                             Semijoin_mat_optimize *sjm)
10892 {
10893   double mat_cost;             // Estimated cost of materialization
10894   double mat_rowcount;         // Estimated row count before duplicate removal
10895   double distinct_rowcount;    // Estimated rowcount after duplicate removal
10896   List<Item> *inner_expr_list;
10897 
10898   if (sj_nest)
10899   {
10900     /*
10901       get_partial_join_cost() assumes a regular join, which is correct when
10902       we optimize a sj-materialization nest (always executed as regular
10903       join).
10904     */
10905     get_partial_join_cost(join, n_tables, &mat_cost, &mat_rowcount);
10906     n_tables+= join->const_tables;
10907     inner_expr_list= &sj_nest->nested_join->sj_inner_exprs;
10908   }
10909   else
10910   {
10911     mat_cost= join->best_read;
10912     mat_rowcount= static_cast<double>(join->best_rowcount);
10913     inner_expr_list= &join->select_lex->item_list;
10914   }
10915 
10916   /*
10917     Adjust output cardinality estimates. If the subquery has form
10918 
10919     ... oe IN (SELECT t1.colX, t2.colY, func(X,Y,Z) )
10920 
10921     then the number of distinct output record combinations has an
10922     upper bound of product of number of records matching the tables
10923     that are used by the SELECT clause.
10924     TODO:
10925     We can get a more precise estimate if we
10926      - use rec_per_key cardinality estimates. For simple cases like
10927      "oe IN (SELECT t.key ...)" it is trivial.
10928      - Functional dependencies between the tables in the semi-join
10929      nest (the payoff is probably less here?)
10930   */
10931   {
10932     for (uint i=0 ; i < n_tables ; i++)
10933     {
10934       JOIN_TAB * const tab= join->best_positions[i].table;
10935       join->map2table[tab->table_ref->tableno()]= tab;
10936     }
10937     List_iterator<Item> it(*inner_expr_list);
10938     Item *item;
10939     table_map map= 0;
10940     while ((item= it++))
10941       map|= item->used_tables();
10942     map&= ~PSEUDO_TABLE_BITS;
10943     Table_map_iterator tm_it(map);
10944     int tableno;
10945     double rows= 1.0;
10946     while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
10947       rows*= join->map2table[tableno]->table()->quick_condition_rows;
10948     distinct_rowcount= min(mat_rowcount, rows);
10949   }
10950   /*
10951     Calculate temporary table parameters and usage costs
10952   */
10953   const uint rowlen= get_tmp_table_rec_length(*inner_expr_list);
10954 
10955   const Cost_model_server *cost_model= join->cost_model();
10956 
10957   Cost_model_server::enum_tmptable_type tmp_table_type;
10958   if (rowlen * distinct_rowcount < join->thd->variables.max_heap_table_size)
10959     tmp_table_type= Cost_model_server::MEMORY_TMPTABLE;
10960   else
10961     tmp_table_type= Cost_model_server::DISK_TMPTABLE;
10962 
10963   /*
10964     Let materialization cost include the cost to create the temporary
10965     table and write the rows into it:
10966   */
10967   mat_cost+= cost_model->tmptable_create_cost(tmp_table_type);
10968   mat_cost+= cost_model->tmptable_readwrite_cost(tmp_table_type, mat_rowcount,
10969                                                  0.0);
10970 
10971   sjm->materialization_cost.reset();
10972   sjm->materialization_cost.add_io(mat_cost);
10973 
10974   sjm->expected_rowcount= distinct_rowcount;
10975 
10976   /*
10977     Set the cost to do a full scan of the temptable (will need this to
10978     consider doing sjm-scan):
10979   */
10980   sjm->scan_cost.reset();
10981   if (distinct_rowcount > 0.0)
10982   {
10983     const double scan_cost=
10984       cost_model->tmptable_readwrite_cost(tmp_table_type,
10985                                           0.0, distinct_rowcount);
10986     sjm->scan_cost.add_io(scan_cost);
10987   }
10988 
10989   // The cost to lookup a row in temp. table
10990   const double row_cost= cost_model->tmptable_readwrite_cost(tmp_table_type,
10991                                                              0.0, 1.0);
10992   sjm->lookup_cost.reset();
10993   sjm->lookup_cost.add_io(row_cost);
10994 }
10995 
10996 
10997 /**
10998    Decides between EXISTS and materialization; performs last steps to set up
10999    the chosen strategy.
11000    @returns 'false' if no error
11001 
11002    @note If UNION this is called on each contained JOIN.
11003 
11004  */
decide_subquery_strategy()11005 bool JOIN::decide_subquery_strategy()
11006 {
11007   assert(unit->item);
11008 
11009   switch (unit->item->substype())
11010   {
11011   case Item_subselect::IN_SUBS:
11012   case Item_subselect::ALL_SUBS:
11013   case Item_subselect::ANY_SUBS:
11014     // All of those are children of Item_in_subselect and may use EXISTS
11015     break;
11016   default:
11017     return false;
11018   }
11019 
11020   Item_in_subselect * const in_pred=
11021     static_cast<Item_in_subselect *>(unit->item);
11022 
11023   Item_exists_subselect::enum_exec_method chosen_method= in_pred->exec_method;
11024   // Materialization does not allow UNION so this can't happen:
11025   assert(chosen_method != Item_exists_subselect::EXEC_MATERIALIZATION);
11026 
11027   if ((chosen_method == Item_exists_subselect::EXEC_EXISTS_OR_MAT) &&
11028       compare_costs_of_subquery_strategies(&chosen_method))
11029     return true;
11030 
11031   switch (chosen_method)
11032   {
11033   case Item_exists_subselect::EXEC_EXISTS:
11034     return in_pred->finalize_exists_transform(select_lex);
11035   case Item_exists_subselect::EXEC_MATERIALIZATION:
11036     return in_pred->finalize_materialization_transform(this);
11037   default:
11038     assert(false);
11039     return true;
11040   }
11041 }
11042 
11043 
11044 /**
11045    Tells what is the cheapest between IN->EXISTS and subquery materialization,
11046    in terms of cost, for the subquery's JOIN.
11047    Input:
11048    - join->{best_positions,best_read,best_rowcount} must contain the
11049    execution plan of EXISTS (where 'join' is the subquery's JOIN)
11050    - join2->{best_positions,best_read,best_rowcount} must be correctly set
11051    (where 'join2' is the parent join, the grandparent join, etc).
11052    Output:
11053    join->{best_positions,best_read,best_rowcount} contain the cheapest
11054    execution plan (where 'join' is the subquery's JOIN).
11055 
11056    This plan choice has to happen before calling functions which set up
11057    execution structures, like JOIN::get_best_combination().
11058 
11059    @param[out] method  chosen method (EXISTS or materialization) will be put
11060                        here.
11061    @returns false if success
11062 */
compare_costs_of_subquery_strategies(Item_exists_subselect::enum_exec_method * method)11063 bool JOIN::compare_costs_of_subquery_strategies(
11064                Item_exists_subselect::enum_exec_method *method)
11065 {
11066   *method= Item_exists_subselect::EXEC_EXISTS;
11067 
11068   Item_exists_subselect::enum_exec_method allowed_strategies=
11069     select_lex->subquery_strategy(thd);
11070 
11071   if (allowed_strategies == Item_exists_subselect::EXEC_EXISTS)
11072     return false;
11073 
11074   assert(allowed_strategies == Item_exists_subselect::EXEC_EXISTS_OR_MAT ||
11075          allowed_strategies == Item_exists_subselect::EXEC_MATERIALIZATION);
11076 
11077   const JOIN *parent_join= unit->outer_select()->join;
11078   if (!parent_join || !parent_join->child_subquery_can_materialize)
11079     return false;
11080 
11081   Item_in_subselect * const in_pred=
11082     static_cast<Item_in_subselect *>(unit->item);
11083 
11084   /*
11085     Testing subquery_allows_etc() at each optimization is necessary as each
11086     execution of a prepared statement may use a different type of parameter.
11087   */
11088   if (!subquery_allows_materialization(in_pred, thd, select_lex,
11089                                        select_lex->outer_select()))
11090     return false;
11091 
11092   Opt_trace_context * const trace= &thd->opt_trace;
11093   Opt_trace_object trace_wrapper(trace);
11094   Opt_trace_object
11095     trace_subqmat(trace, "execution_plan_for_potential_materialization");
11096   const double saved_best_read= best_read;
11097   const ha_rows saved_best_rowcount= best_rowcount;
11098   POSITION * const saved_best_pos= best_positions;
11099 
11100   if (in_pred->in2exists_added_to_where())
11101   {
11102     Opt_trace_array trace_subqmat_steps(trace, "steps");
11103 
11104     // Up to one extra slot per semi-join nest is needed (if materialized)
11105     const uint sj_nests= select_lex->sj_nests.elements;
11106 
11107     if (!(best_positions= new (thd->mem_root) POSITION[tables + sj_nests]))
11108       return true;
11109 
11110     // Compute plans which do not use outer references
11111 
11112     assert(allow_outer_refs);
11113     allow_outer_refs= false;
11114 
11115     if (optimize_semijoin_nests_for_materialization(this))
11116       return true;
11117 
11118     if (Optimize_table_order(thd, this, NULL).choose_table_order())
11119       return true;
11120   }
11121   else
11122   {
11123     /*
11124       If IN->EXISTS didn't add any condition to WHERE (only to HAVING, which
11125       can happen if subquery has aggregates) then the plan for materialization
11126       will be the same as for EXISTS - don't compute it again.
11127     */
11128     trace_subqmat.add("surely_same_plan_as_EXISTS", true).
11129       add_alnum("cause", "EXISTS_did_not_change_WHERE");
11130   }
11131 
11132   Semijoin_mat_optimize sjm;
11133   calculate_materialization_costs(this, NULL, primary_tables, &sjm);
11134 
11135   /*
11136     The number of evaluations of the subquery influences costs, we need to
11137     compute it.
11138   */
11139   Opt_trace_object trace_subq_mat_decision(trace, "subq_mat_decision");
11140   Opt_trace_array trace_parents(trace, "parent_fanouts");
11141   const Item_subselect *subs= in_pred;
11142   double subq_executions= 1.0;
11143   for(;;)
11144   {
11145     Opt_trace_object trace_parent(trace);
11146     trace_parent.add_select_number(parent_join->select_lex->select_number);
11147     double parent_fanout;
11148     if (// safety, not sure needed
11149         parent_join->plan_is_const() ||
11150         // if subq is in condition on constant table:
11151         !parent_join->child_subquery_can_materialize)
11152     {
11153       parent_fanout= 1.0;
11154       trace_parent.add("subq_attached_to_const_table", true);
11155     }
11156     else
11157     {
11158       if (subs->in_cond_of_tab != NO_PLAN_IDX)
11159       {
11160         /*
11161           Subquery is attached to a certain 'pos', pos[-1].prefix_rowcount
11162           is the number of times we'll start a loop accessing 'pos'; each such
11163           loop will read pos->rows_fetched rows of 'pos', so subquery will
11164           be evaluated pos[-1].prefix_rowcount * pos->rows_fetched times.
11165           Exceptions:
11166           - if 'pos' is first, use 1.0 instead of pos[-1].prefix_rowcount
11167           - if 'pos' is first of a sj-materialization nest, same.
11168 
11169           If in a sj-materialization nest, pos->rows_fetched and
11170           pos[-1].prefix_rowcount are of the "nest materialization" plan
11171           (copied back in fix_semijoin_strategies()), which is
11172           appropriate as it corresponds to evaluations of our subquery.
11173 
11174           pos->prefix_rowcount is not suitable because if we have:
11175           select ... from ot1 where ot1.col in
11176             (select it1.col1 from it1 where it1.col2 not in (subq));
11177           and subq does subq-mat, and plan is ot1 - it1+firstmatch(ot1),
11178           then:
11179           - t1.prefix_rowcount==1 (due to firstmatch)
11180           - subq is attached to it1, and is evaluated for each row read from
11181             t1, potentially way more than 1.
11182        */
11183         const uint idx= subs->in_cond_of_tab;
11184         assert((int)idx >= 0 && idx < parent_join->tables);
11185         trace_parent.add("subq_attached_to_table", true);
11186         QEP_TAB *const parent_tab= &parent_join->qep_tab[idx];
11187         trace_parent.add_utf8_table(parent_tab->table_ref);
11188         parent_fanout= parent_tab->position()->rows_fetched;
11189         if ((idx > parent_join->const_tables) &&
11190             !sj_is_materialize_strategy(parent_tab->position()->sj_strategy))
11191           parent_fanout*=
11192             parent_tab[-1].position()->prefix_rowcount;
11193       }
11194       else
11195       {
11196         /*
11197           Subquery is SELECT list, GROUP BY, ORDER BY, HAVING: it is evaluated
11198           at the end of the parent join's execution.
11199           It can be evaluated once per row-before-grouping:
11200           SELECT SUM(t1.col IN (subq)) FROM t1 GROUP BY expr;
11201           or once per row-after-grouping:
11202           SELECT SUM(t1.col) AS s FROM t1 GROUP BY expr HAVING s IN (subq),
11203           SELECT SUM(t1.col) IN (subq) FROM t1 GROUP BY expr
11204           It's hard to tell. We simply assume 'once per
11205           row-before-grouping'.
11206 
11207           Another approximation:
11208           SELECT ... HAVING x IN (subq) LIMIT 1
11209           best_rowcount=1 due to LIMIT, though HAVING (and thus the subquery)
11210           may be evaluated many times before HAVING becomes true and the limit
11211           is reached.
11212         */
11213         trace_parent.add("subq_attached_to_join_result", true);
11214         parent_fanout= static_cast<double>(parent_join->best_rowcount);
11215       }
11216     }
11217     subq_executions*= parent_fanout;
11218     trace_parent.add("fanout", parent_fanout);
11219     const bool cacheable= parent_join->select_lex->is_cacheable();
11220     trace_parent.add("cacheable", cacheable);
11221     if (cacheable)
11222     {
11223       // Parent executed only once
11224       break;
11225     }
11226     /*
11227       Parent query is executed once per outer row => go up to find number of
11228       outer rows. Example:
11229       SELECT ... IN(subq-with-in2exists WHERE ... IN (subq-with-mat))
11230     */
11231     if (!(subs= parent_join->unit->item))
11232     {
11233       // derived table, materialized only once
11234       break;
11235     }
11236     parent_join= parent_join->unit->outer_select()->join;
11237     if (!parent_join)
11238     {
11239       /*
11240         May be single-table UPDATE/DELETE, has no join.
11241         @todo  we should find how many rows it plans to UPDATE/DELETE, taking
11242         inspiration in Explain_table::explain_rows_and_filtered().
11243         This is not a priority as it applies only to
11244         UPDATE - child(non-mat-subq) - grandchild(may-be-mat-subq).
11245         And it will autosolve the day UPDATE gets a JOIN.
11246       */
11247       break;
11248     }
11249   }  // for(;;)
11250   trace_parents.end();
11251 
11252   const double cost_exists= subq_executions * saved_best_read;
11253   const double cost_mat_table= sjm.materialization_cost.total_cost();
11254   const double cost_mat= cost_mat_table + subq_executions *
11255     sjm.lookup_cost.total_cost();
11256   const bool mat_chosen=
11257     (allowed_strategies == Item_exists_subselect::EXEC_EXISTS_OR_MAT) ?
11258     (cost_mat < cost_exists) : true;
11259   trace_subq_mat_decision
11260     .add("cost_to_create_and_fill_materialized_table",
11261          cost_mat_table)
11262     .add("cost_of_one_EXISTS", saved_best_read)
11263     .add("number_of_subquery_evaluations", subq_executions)
11264     .add("cost_of_materialization", cost_mat)
11265     .add("cost_of_EXISTS", cost_exists)
11266     .add("chosen", mat_chosen);
11267   if (mat_chosen)
11268     *method= Item_exists_subselect::EXEC_MATERIALIZATION;
11269   else
11270   {
11271     best_read= saved_best_read;
11272     best_rowcount= saved_best_rowcount;
11273     best_positions= saved_best_pos;
11274     /*
11275       Don't restore JOIN::positions or best_ref, they're not used
11276       afterwards. best_positions is (like: by get_sj_strategy()).
11277     */
11278   }
11279   return false;
11280 }
11281 
11282 
11283 /**
11284   Optimize rollup specification.
11285 
11286   Allocate objects needed for rollup processing.
11287 
11288   @returns false if success, true if error.
11289 */
11290 
optimize_rollup()11291 bool JOIN::optimize_rollup()
11292 {
11293   tmp_table_param.quick_group= 0;	// Can't create groups in tmp table
11294   rollup.state= ROLLUP::STATE_INITED;
11295 
11296   /*
11297     Create pointers to the different sum function groups
11298     These are updated by rollup_make_fields()
11299   */
11300   tmp_table_param.group_parts= send_group_parts;
11301   /*
11302     substitute_gc() might substitute an expression in the GROUP BY list with
11303     a generated column. In such case the GC is added to the all_fields as a
11304     hidden field. In total, all_fields list could be grown by up to
11305     send_group_parts columns. Reserve space for them here.
11306   */
11307   const uint ref_array_size= all_fields.elements + send_group_parts;
11308 
11309   Item_null_result **null_items=
11310     static_cast<Item_null_result**>(thd->alloc(sizeof(Item*)*send_group_parts));
11311 
11312   rollup.null_items= Item_null_array(null_items, send_group_parts);
11313   rollup.ref_pointer_arrays=
11314     static_cast<Ref_ptr_array*>
11315     (thd->alloc((sizeof(Ref_ptr_array) +
11316                  ref_array_size * sizeof(Item*)) * send_group_parts));
11317   rollup.fields=
11318     static_cast<List<Item>*>(thd->alloc(sizeof(List<Item>) * send_group_parts));
11319 
11320   if (!null_items || !rollup.ref_pointer_arrays || !rollup.fields)
11321     return true;
11322 
11323   Item **ref_array= (Item**) (rollup.ref_pointer_arrays+send_group_parts);
11324 
11325   /*
11326     Prepare space for field list for the different levels
11327     These will be filled up in rollup_make_fields()
11328   */
11329   ORDER *group= group_list;
11330   for (uint i= 0; i < send_group_parts; i++, group= group->next)
11331   {
11332     rollup.null_items[i]=
11333       new (thd->mem_root) Item_null_result((*group->item)->field_type(),
11334                                            (*group->item)->result_type());
11335     if (rollup.null_items[i] == NULL)
11336       return true;           /* purecov: inspected */
11337     List<Item> *rollup_fields= &rollup.fields[i];
11338     rollup_fields->empty();
11339     rollup.ref_pointer_arrays[i]= Ref_ptr_array(ref_array, ref_array_size);
11340     ref_array+= ref_array_size;
11341   }
11342   for (uint i= 0; i < send_group_parts; i++)
11343   {
11344     for (uint j= 0; j < fields_list.elements; j++)
11345       rollup.fields[i].push_back(rollup.null_items[i]);
11346   }
11347   return false;
11348 }
11349 
11350 
11351 /**
11352   Refine the best_rowcount estimation based on what happens after tables
11353   have been joined: LIMIT and type of result sink.
11354  */
refine_best_rowcount()11355 void JOIN::refine_best_rowcount()
11356 {
11357   // If plan is const, 0 or 1 rows should be returned
11358   assert(!plan_is_const() || best_rowcount <= 1);
11359 
11360   if (plan_is_const())
11361     return;
11362 
11363   /*
11364     If a derived table, or a member of a UNION which itself forms a derived
11365     table:
11366     setting estimate to 0 or 1 row would mark the derived table as const.
11367     The row count is bumped to the nearest higher value, so that the
11368     query block will not be evaluated during optimization.
11369   */
11370   if (best_rowcount <= 1 &&
11371       select_lex->master_unit()->first_select()->linkage ==
11372       DERIVED_TABLE_TYPE)
11373     best_rowcount= 2;
11374 
11375   /*
11376     There will be no more rows than defined in the LIMIT clause. Use it
11377     as an estimate. If LIMIT 1 is specified, the query block will be
11378     considered "const", with actual row count 0 or 1.
11379   */
11380   set_if_smaller(best_rowcount, unit->select_limit_cnt);
11381 }
11382 
11383 /**
11384   @} (end of group Query_Optimizer)
11385 */
11386 
11387 /**
11388   This function is used to get the key length of Item object on
11389   which one tmp field will be created during create_tmp_table.
11390   This function references KEY_PART_INFO::init_from_field().
11391 
11392   @param item  A inner item of outer join
11393 
11394   @return  The length of a item to be as a key of a temp table
11395 */
11396 
get_key_length_tmp_table(Item * item)11397 static uint32 get_key_length_tmp_table(Item *item)
11398 {
11399   uint32 len= 0;
11400 
11401   item= item->real_item();
11402   if (item->type() == Item::FIELD_ITEM)
11403     len= ((Item_field *)item)->field->key_length();
11404   else
11405     len= item->max_length;
11406 
11407   if (item->maybe_null)
11408     len+= HA_KEY_NULL_LENGTH;
11409 
11410   // references KEY_PART_INFO::init_from_field()
11411   enum_field_types type= item->field_type();
11412   if (type == MYSQL_TYPE_BLOB ||
11413       type == MYSQL_TYPE_VARCHAR ||
11414       type == MYSQL_TYPE_GEOMETRY)
11415     len+= HA_KEY_BLOB_LENGTH;
11416 
11417   return len;
11418 }
11419 
11420