1 /* Copyright (c) 2000, 2021, Oracle and/or its affiliates.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
22 
23 /**
24   @file
25 
26   @brief Optimize query expressions: Make optimal table join order, select
27          optimal access methods per table, apply grouping, sorting and
28          limit processing.
29 
30   @defgroup Query_Optimizer  Query Optimizer
31   @{
32 */
33 
34 #include "sql_optimizer.h"
35 
36 #include "my_bit.h"              // my_count_bits
37 #include "abstract_query_plan.h" // Join_plan
38 #include "debug_sync.h"          // DEBUG_SYNC
39 #include "item_sum.h"            // Item_sum
40 #include "lock.h"                // mysql_unlock_some_tables
41 #include "opt_explain.h"         // join_type_str
42 #include "opt_trace.h"           // Opt_trace_object
43 #include "sql_base.h"            // init_ftfuncs
44 #include "sql_join_buffer.h"     // JOIN_CACHE
45 #include "sql_parse.h"           // check_stack_overrun
46 #include "sql_planner.h"         // calculate_condition_filter
47 #include "sql_resolver.h"        // subquery_allows_materialization
48 #include "sql_test.h"            // print_where
49 #include "sql_tmp_table.h"       // get_max_key_and_part_length
50 #include "opt_hints.h"           // hint_table_state
51 
52 #include <algorithm>
53 using std::max;
54 using std::min;
55 
56 static bool optimize_semijoin_nests_for_materialization(JOIN *join);
57 static void calculate_materialization_costs(JOIN *join, TABLE_LIST *sj_nest,
58                                             uint n_tables,
59                                             Semijoin_mat_optimize *sjm);
60 static bool make_join_select(JOIN *join, Item *item);
61 static bool list_contains_unique_index(JOIN_TAB *tab,
62                           bool (*find_func) (Field *, void *), void *data);
63 static bool find_field_in_item_list (Field *field, void *data);
64 static bool find_field_in_order_list (Field *field, void *data);
65 static ORDER *create_distinct_group(THD *thd, Ref_ptr_array ref_pointer_array,
66                                     ORDER *order, List<Item> &fields,
67                                     List<Item> &all_fields,
68 				    bool *all_order_by_fields_used);
69 static TABLE *get_sort_by_table(ORDER *a,ORDER *b,TABLE_LIST *tables);
70 static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab);
71 static Item *remove_additional_cond(Item* conds);
72 static void trace_table_dependencies(Opt_trace_context * trace,
73                                      JOIN_TAB *join_tabs,
74                                      uint table_count);
75 static bool
76 update_ref_and_keys(THD *thd, Key_use_array *keyuse,JOIN_TAB *join_tab,
77                     uint tables, Item *cond, COND_EQUAL *cond_equal,
78                     table_map normal_tables, SELECT_LEX *select_lex,
79                     SARGABLE_PARAM **sargables);
80 static bool pull_out_semijoin_tables(JOIN *join);
81 static void add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab);
82 static ha_rows get_quick_record_count(THD *thd, JOIN_TAB *tab, ha_rows limit);
83 static Item *
84 make_cond_for_table_from_pred(Item *root_cond, Item *cond,
85                               table_map tables, table_map used_table,
86                               bool exclude_expensive_cond);
87 static bool
88 only_eq_ref_tables(JOIN *join, ORDER *order, table_map tables,
89                    table_map *cached_eq_ref_tables, table_map
90                    *eq_ref_tables);
91 static bool setup_join_buffering(JOIN_TAB *tab, JOIN *join, uint no_jbuf_after);
92 
93 static bool
94 test_if_skip_sort_order(JOIN_TAB *tab, ORDER *order, ha_rows select_limit,
95                         const bool no_changes, const key_map *map,
96                         const char *clause_type);
97 
98 static Item_func_match *test_if_ft_index_order(ORDER *order);
99 
100 
101 static uint32 get_key_length_tmp_table(Item *item);
102 
103 /**
104   Optimizes one query block into a query execution plan (QEP.)
105 
106   This is the entry point to the query optimization phase. This phase
107   applies both logical (equivalent) query rewrites, cost-based join
108   optimization, and rule-based access path selection. Once an optimal
109   plan is found, the member function creates/initializes all
110   structures needed for query execution. The main optimization phases
111   are outlined below:
112 
113     -# Logical transformations:
114       - Outer to inner joins transformation.
115       - Equality/constant propagation.
116       - Partition pruning.
117       - COUNT(*), MIN(), MAX() constant substitution in case of
118         implicit grouping.
119       - ORDER BY optimization.
120     -# Perform cost-based optimization of table order and access path
121        selection. See JOIN::make_join_plan()
122     -# Post-join order optimization:
123        - Create optimal table conditions from the where clause and the
124          join conditions.
125        - Inject outer-join guarding conditions.
126        - Adjust data access methods after determining table condition
127          (several times.)
128        - Optimize ORDER BY/DISTINCT.
129     -# Code generation
130        - Set data access functions.
131        - Try to optimize away sorting/distinct.
132        - Setup temporary table usage for grouping and/or sorting.
133 
134   @retval 0 Success.
135   @retval 1 Error, error code saved in member JOIN::error.
136 */
137 int
optimize()138 JOIN::optimize()
139 {
140   uint no_jbuf_after= UINT_MAX;
141 
142   DBUG_ENTER("JOIN::optimize");
143   assert(select_lex->leaf_table_count == 0 ||
144          thd->lex->is_query_tables_locked() ||
145          select_lex == unit->fake_select_lex);
146   assert(tables == 0 &&
147          primary_tables == 0 &&
148          tables_list == (TABLE_LIST*)1);
149 
150   // to prevent double initialization on EXPLAIN
151   if (optimized)
152     DBUG_RETURN(0);
153 
154   Prepare_error_tracker tracker(thd);
155 
156   DEBUG_SYNC(thd, "before_join_optimize");
157 
158   THD_STAGE_INFO(thd, stage_optimizing);
159 
160   if (select_lex->first_execution)
161   {
162     /**
163       @todo
164       This query block didn't transform itself in SELECT_LEX::prepare(), so
165       belongs to a parent query block. That parent, or its parents, had to
166       transform us - it has not; maybe it is itself in prepare() and
167       evaluating the present query block as an Item_subselect. Such evaluation
168       in prepare() is expected to be a rare case to be eliminated in the
169       future ("SET x=(subq)" is one such case; because it locks tables before
170       prepare()).
171     */
172     if (select_lex->apply_local_transforms(thd, false))
173       DBUG_RETURN(error= 1);
174   }
175 
176   Opt_trace_context * const trace= &thd->opt_trace;
177   Opt_trace_object trace_wrapper(trace);
178   Opt_trace_object trace_optimize(trace, "join_optimization");
179   trace_optimize.add_select_number(select_lex->select_number);
180   Opt_trace_array trace_steps(trace, "steps");
181 
182   count_field_types(select_lex, &tmp_table_param, all_fields, false, false);
183 
184   assert(tmp_table_param.sum_func_count == 0 ||
185          group_list || implicit_grouping);
186 
187   if (select_lex->olap == ROLLUP_TYPE && optimize_rollup())
188     DBUG_RETURN(true); /* purecov: inspected */
189 
190   if (alloc_func_list())
191     DBUG_RETURN(1);    /* purecov: inspected */
192 
193   if (select_lex->get_optimizable_conditions(thd, &where_cond, &having_cond))
194     DBUG_RETURN(1);
195 
196   set_optimized();
197 
198   tables_list= select_lex->get_table_list();
199 
200   /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
201   /*
202     Run optimize phase for all derived tables/views used in this SELECT,
203     including those in semi-joins.
204   */
205   if (select_lex->materialized_derived_table_count)
206   {
207     for (TABLE_LIST *tl= select_lex->leaf_tables; tl; tl= tl->next_leaf)
208     {
209       if (tl->is_view_or_derived() && tl->optimize_derived(thd))
210         DBUG_RETURN(1);
211     }
212   }
213 
214   /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
215 
216   row_limit= ((select_distinct || order || group_list) ?
217              HA_POS_ERROR : unit->select_limit_cnt);
218   // m_select_limit is used to decide if we are likely to scan the whole table.
219   m_select_limit= unit->select_limit_cnt;
220 
221   if (unit->first_select()->active_options() & OPTION_FOUND_ROWS)
222   {
223     /*
224       Calculate found rows if
225       - LIMIT is set, and
226       - Query block is not equipped with "braces". In this case, each
227         query block must be calculated fully and the limit is applied on
228         the final UNION evaluation.
229     */
230     calc_found_rows= m_select_limit != HA_POS_ERROR && !select_lex->braces;
231   }
232   if (having_cond || calc_found_rows)
233     m_select_limit= HA_POS_ERROR;
234 
235   if (unit->select_limit_cnt == 0 && !calc_found_rows)
236   {
237     zero_result_cause= "Zero limit";
238     best_rowcount= 0;
239     goto setup_subq_exit;
240   }
241 
242   if (where_cond || select_lex->outer_join)
243   {
244     if (optimize_cond(thd, &where_cond, &cond_equal,
245                       &select_lex->top_join_list, &select_lex->cond_value))
246     {
247       error= 1;
248       DBUG_PRINT("error",("Error from optimize_cond"));
249       DBUG_RETURN(1);
250     }
251     if (select_lex->cond_value == Item::COND_FALSE)
252     {
253       zero_result_cause= "Impossible WHERE";
254       best_rowcount= 0;
255       goto setup_subq_exit;
256     }
257   }
258   if (having_cond)
259   {
260     if (optimize_cond(thd, &having_cond, &cond_equal, NULL,
261                       &select_lex->having_value))
262     {
263       error= 1;
264       DBUG_PRINT("error",("Error from optimize_cond"));
265       DBUG_RETURN(1);
266     }
267     if (select_lex->having_value == Item::COND_FALSE)
268     {
269       zero_result_cause= "Impossible HAVING";
270       best_rowcount= 0;
271       goto setup_subq_exit;
272     }
273   }
274 
275   if (thd->lex->sql_command == SQLCOM_INSERT_SELECT ||
276       thd->lex->sql_command == SQLCOM_REPLACE_SELECT)
277   {
278     /*
279       Statement-based replication of INSERT ... SELECT ... LIMIT and
280       REPLACE ... SELECT is safe as order of row is defined with either
281       ORDER BY or other condition. However it is too late for it have
282       an impact to our decision to switch to row- based. We can only
283       suppress warning here.
284     */
285     if (select_lex->select_limit &&
286         select_lex->select_limit->fixed &&
287         select_lex->select_limit->val_int() &&
288         !is_order_deterministic(&select_lex->top_join_list, where_cond, order))
289     {
290       thd->order_deterministic= false;
291     }
292   }
293 
294   if (select_lex->partitioned_table_count && prune_table_partitions())
295   {
296     error= 1;
297     DBUG_PRINT("error", ("Error from prune_partitions"));
298     DBUG_RETURN(1);
299   }
300 
301   /*
302      Try to optimize count(*), min() and max() to const fields if
303      there is implicit grouping (aggregate functions but no
304      group_list). In this case, the result set shall only contain one
305      row.
306   */
307   if (tables_list && implicit_grouping)
308   {
309     int res;
310     /*
311       opt_sum_query() returns HA_ERR_KEY_NOT_FOUND if no rows match
312       the WHERE condition,
313       or 1 if all items were resolved (optimized away),
314       or 0, or an error number HA_ERR_...
315 
316       If all items were resolved by opt_sum_query, there is no need to
317       open any tables.
318     */
319     if ((res= opt_sum_query(thd, select_lex->leaf_tables, all_fields,
320                             where_cond)))
321     {
322       best_rowcount= 0;
323       if (res == HA_ERR_KEY_NOT_FOUND)
324       {
325         DBUG_PRINT("info",("No matching min/max row"));
326 	zero_result_cause= "No matching min/max row";
327         goto setup_subq_exit;
328       }
329       if (res > 1)
330       {
331         error= res;
332         DBUG_PRINT("error",("Error from opt_sum_query"));
333         DBUG_RETURN(1);
334       }
335       if (res < 0)
336       {
337         DBUG_PRINT("info",("No matching min/max row"));
338         zero_result_cause= "No matching min/max row";
339         goto setup_subq_exit;
340       }
341       DBUG_PRINT("info",("Select tables optimized away"));
342       zero_result_cause= "Select tables optimized away";
343       tables_list= 0;				// All tables resolved
344       best_rowcount= 1;
345       const_tables= tables= primary_tables= select_lex->leaf_table_count;
346       /*
347         Extract all table-independent conditions and replace the WHERE
348         clause with them. All other conditions were computed by opt_sum_query
349         and the MIN/MAX/COUNT function(s) have been replaced by constants,
350         so there is no need to compute the whole WHERE clause again.
351         Notice that make_cond_for_table() will always succeed to remove all
352         computed conditions, because opt_sum_query() is applicable only to
353         conjunctions.
354         Preserve conditions for EXPLAIN.
355       */
356       if (where_cond && !thd->lex->describe)
357       {
358         Item *table_independent_conds=
359           make_cond_for_table(where_cond, PSEUDO_TABLE_BITS, 0, 0);
360         DBUG_EXECUTE("where",
361                      print_where(table_independent_conds,
362                                  "where after opt_sum_query()",
363                                  QT_ORDINARY););
364         where_cond= table_independent_conds;
365       }
366       goto setup_subq_exit;
367     }
368   }
369   if (!tables_list)
370   {
371     DBUG_PRINT("info",("No tables"));
372     best_rowcount= 1;
373     error= 0;
374     if (make_tmp_tables_info())
375       DBUG_RETURN(1);
376     count_field_types(select_lex, &tmp_table_param, all_fields, false, false);
377     // Make plan visible for EXPLAIN
378     set_plan_state(NO_TABLES);
379     DBUG_RETURN(0);
380   }
381   error= -1;					// Error is sent to client
382   sort_by_table= get_sort_by_table(order, group_list, select_lex->leaf_tables);
383 
384   if ((where_cond || group_list || order) &&
385       substitute_gc(thd, select_lex, where_cond, group_list, order))
386   {
387     // We added hidden fields to the all_fields list, count them.
388     count_field_types(select_lex, &tmp_table_param, select_lex->all_fields,
389                       false, false);
390   }
391 
392   // Set up join order and initial access paths
393   THD_STAGE_INFO(thd, stage_statistics);
394   if (make_join_plan())
395   {
396     if (thd->killed)
397       thd->send_kill_message();
398     DBUG_PRINT("error",("Error: JOIN::make_join_plan() failed"));
399     DBUG_RETURN(1);
400   }
401 
402   // At this stage, join_tab==NULL, JOIN_TABs are listed in order by best_ref.
403   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
404 
405   if (zero_result_cause)
406     goto setup_subq_exit;
407 
408   if (rollup.state != ROLLUP::STATE_NONE)
409   {
410     if (rollup_process_const_fields())
411     {
412       DBUG_PRINT("error", ("Error: rollup_process_fields() failed"));
413       DBUG_RETURN(1);
414     }
415     /*
416       Fields may have been replaced by Item_func_rollup_const, so
417       recalculate the number of fields and functions for this query block.
418     */
419 
420     // JOIN::optimize_rollup() may set quick_group=0, and we must not undo that.
421     const uint save_quick_group= tmp_table_param.quick_group;
422 
423     count_field_types(select_lex, &tmp_table_param, all_fields, false, false);
424     tmp_table_param.quick_group= save_quick_group;
425   }
426   else
427   {
428     /* Remove distinct if only const tables */
429     select_distinct&= !plan_is_const();
430   }
431 
432   if (const_tables && !thd->locked_tables_mode &&
433       !(select_lex->active_options() & SELECT_NO_UNLOCK))
434   {
435     TABLE *ct[MAX_TABLES];
436     for (uint i= 0; i < const_tables; i++)
437       ct[i]= best_ref[i]->table();
438     mysql_unlock_some_tables(thd, ct, const_tables);
439   }
440   if (!where_cond && select_lex->outer_join)
441   {
442     /* Handle the case where we have an OUTER JOIN without a WHERE */
443     where_cond=new Item_int((longlong) 1,1);	// Always true
444   }
445 
446   error= 0;
447   /*
448     Among the equal fields belonging to the same multiple equality
449     choose the one that is to be retrieved first and substitute
450     all references to these in where condition for a reference for
451     the selected field.
452   */
453   if (where_cond)
454   {
455     where_cond= substitute_for_best_equal_field(where_cond, cond_equal,
456                                                 map2table);
457     if (thd->is_error())
458     {
459       error= 1;
460       DBUG_PRINT("error",("Error from substitute_for_best_equal"));
461       DBUG_RETURN(1);
462     }
463     where_cond->update_used_tables();
464     DBUG_EXECUTE("where",
465                  print_where(where_cond,
466                              "after substitute_best_equal",
467                              QT_ORDINARY););
468   }
469 
470   /*
471     Perform the same optimization on field evaluation for all join conditions.
472   */
473   for (uint i= const_tables; i < tables ; ++i)
474   {
475     JOIN_TAB *const tab= best_ref[i];
476     if (tab->position() && tab->join_cond())
477     {
478       tab->set_join_cond(substitute_for_best_equal_field(tab->join_cond(),
479                                                          tab->cond_equal,
480                                                          map2table));
481       if (thd->is_error())
482       {
483         error= 1;
484         DBUG_PRINT("error",("Error from substitute_for_best_equal"));
485         DBUG_RETURN(1);
486       }
487       tab->join_cond()->update_used_tables();
488     }
489   }
490 
491   if (init_ref_access())
492   {
493     error= 1;
494     DBUG_PRINT("error",("Error from init_ref_access"));
495     DBUG_RETURN(1);
496   }
497 
498   // Update table dependencies after assigning ref access fields
499   update_depend_map();
500 
501   THD_STAGE_INFO(thd, stage_preparing);
502 
503   if (make_join_select(this, where_cond))
504   {
505     if (thd->is_error())
506       DBUG_RETURN(1);
507 
508     zero_result_cause=
509       "Impossible WHERE noticed after reading const tables";
510     goto setup_subq_exit;
511   }
512 
513   if (select_lex->query_result()->initialize_tables(this))
514   {
515     DBUG_PRINT("error",("Error: initialize_tables() failed"));
516     DBUG_RETURN(1);				// error == -1
517   }
518 
519   error= -1;					/* if goto err */
520 
521   if (optimize_distinct_group_order())
522     DBUG_RETURN(true);
523 
524   if ((select_lex->active_options() & SELECT_NO_JOIN_CACHE) ||
525       select_lex->ftfunc_list->elements)
526     no_jbuf_after= 0;
527 
528   /* Perform FULLTEXT search before all regular searches */
529   if (select_lex->has_ft_funcs() && optimize_fts_query())
530     DBUG_RETURN(1);
531 
532   /*
533     By setting child_subquery_can_materialize so late we gain the following:
534     JOIN::compare_costs_of_subquery_strategies() can test this variable to
535     know if we are have finished evaluating constant conditions, which itself
536     helps determining fanouts.
537   */
538   child_subquery_can_materialize= true;
539 
540   /*
541     It's necessary to check const part of HAVING cond as
542     there is a chance that some cond parts may become
543     const items after make_join_statisctics(for example
544     when Item is a reference to const table field from
545     outer join).
546     This check is performed only for those conditions
547     which do not use aggregate functions. In such case
548     temporary table may not be used and const condition
549     elements may be lost during further having
550     condition transformation in JOIN::exec.
551   */
552   if (having_cond && const_table_map && !having_cond->with_sum_func)
553   {
554     having_cond->update_used_tables();
555     if (remove_eq_conds(thd, having_cond, &having_cond,
556                         &select_lex->having_value))
557     {
558       error= 1;
559       DBUG_PRINT("error",("Error from remove_eq_conds"));
560       DBUG_RETURN(1);
561     }
562     if (select_lex->having_value == Item::COND_FALSE)
563     {
564       having_cond= new Item_int((longlong) 0,1);
565       zero_result_cause= "Impossible HAVING noticed after reading const tables";
566       goto setup_subq_exit;
567     }
568   }
569 
570   /* Cache constant expressions in WHERE, HAVING, ON clauses. */
571   if (!plan_is_const() && cache_const_exprs())
572     DBUG_RETURN(1);
573 
574   // See if this subquery can be evaluated with subselect_indexsubquery_engine
575   if (const int ret= replace_index_subquery())
576   {
577     set_plan_state(PLAN_READY);
578     /*
579       We leave optimize() because the rest of it is only about order/group
580       which those subqueries don't have and about setting up plan which
581       we're not going to use due to different execution method.
582     */
583     DBUG_RETURN(ret < 0);
584   }
585 
586   {
587     /*
588       If the hint FORCE INDEX FOR ORDER BY/GROUP BY is used for the first
589       table (it does not make sense for other tables) then we cannot do join
590       buffering.
591     */
592     if (!plan_is_const())
593     {
594       const TABLE * const first= best_ref[const_tables]->table();
595       if ((first->force_index_order && order) ||
596           (first->force_index_group && group_list))
597         no_jbuf_after= 0;
598     }
599 
600     bool simple_sort= true;
601     // Check whether join cache could be used
602     for (uint i= const_tables; i < tables; i++)
603     {
604       JOIN_TAB *const tab= best_ref[i];
605       if (!tab->position())
606         continue;
607       if (setup_join_buffering(tab, this, no_jbuf_after))
608         DBUG_RETURN(true);
609       if (tab->use_join_cache() != JOIN_CACHE::ALG_NONE)
610         simple_sort= false;
611       assert(tab->type() != JT_FT ||
612              tab->use_join_cache() == JOIN_CACHE::ALG_NONE);
613     }
614     if (!simple_sort)
615     {
616       /*
617         A join buffer is used for this table. We here inform the optimizer
618         that it should not rely on rows of the first non-const table being in
619         order thanks to an index scan; indeed join buffering of the present
620         table subsequently changes the order of rows.
621       */
622       simple_order= simple_group= false;
623     }
624   }
625 
626   if (!plan_is_const() && order)
627   {
628     /*
629       Force using of tmp table if sorting by a SP or UDF function due to
630       their expensive and probably non-deterministic nature.
631     */
632     for (ORDER *tmp_order= order; tmp_order ; tmp_order=tmp_order->next)
633     {
634       Item *item= *tmp_order->item;
635       if (item->is_expensive())
636       {
637         /* Force tmp table without sort */
638         simple_order= simple_group= false;
639         break;
640       }
641     }
642   }
643 
644   /*
645     Check if we need to create a temporary table.
646     This has to be done if all tables are not already read (const tables)
647     and one of the following conditions holds:
648     - We are using DISTINCT (simple distinct's have already been optimized away)
649     - We are using an ORDER BY or GROUP BY on fields not in the first table
650     - We are using different ORDER BY and GROUP BY orders
651     - The user wants us to buffer the result.
652     When the WITH ROLLUP modifier is present, we cannot skip temporary table
653     creation for the DISTINCT clause just because there are only const tables.
654   */
655   need_tmp= ((!plan_is_const() &&
656 	     ((select_distinct || (order && !simple_order) ||
657                (group_list && !simple_group)) ||
658 	      (group_list && order) ||
659               (select_lex->active_options() & OPTION_BUFFER_RESULT))) ||
660              (rollup.state != ROLLUP::STATE_NONE && select_distinct));
661 
662   DBUG_EXECUTE("info", TEST_join(this););
663 
664   if (!plan_is_const())
665   {
666     JOIN_TAB *tab= best_ref[const_tables];
667     /*
668       Because filesort always does a full table scan or a quick range scan
669       we must add the removed reference to the select for the table.
670       We only need to do this when we have a simple_order or simple_group
671       as in other cases the join is done before the sort.
672     */
673     if ((order || group_list) &&
674         tab->type() != JT_ALL &&
675         tab->type() != JT_FT &&
676         tab->type() != JT_REF_OR_NULL &&
677         ((order && simple_order) || (group_list && simple_group)))
678     {
679       if (add_ref_to_table_cond(thd,tab)) {
680         DBUG_RETURN(1);
681       }
682     }
683     // Test if we can use an index instead of sorting
684     test_skip_sort();
685   }
686 
687   if (alloc_qep(tables))
688     DBUG_RETURN(error= 1);                      /* purecov: inspected */
689 
690   if (make_join_readinfo(this, no_jbuf_after))
691     DBUG_RETURN(1);                             /* purecov: inspected */
692 
693   if (make_tmp_tables_info())
694     DBUG_RETURN(1);
695 
696   // At this stage, we have fully set QEP_TABs; JOIN_TABs are unaccessible,
697   // pushed joins(see below) are still allowed to change the QEP_TABs
698 
699   /*
700     Push joins to handlerton(s)
701 
702     The handlerton(s) will inspect the QEP through the
703     AQP (Abstract Query Plan) and extract from it whatever
704     it might implement of pushed execution.
705 
706     It is the responsibility of the handler:
707      - to store any information it need for later
708        execution of pushed queries.
709      - to call appropriate AQP functions which modifies the
710        QEP to use the special 'linked' read functions
711        for those parts of the join which have been pushed.
712 
713     Currently pushed joins are only implemented by NDB.
714 
715     It only make sense to try pushing if > 1 non-const tables.
716   */
717   if (!plan_is_single_table() && !plan_is_const())
718   {
719     const AQP::Join_plan plan(this);
720     if (ha_make_pushed_joins(thd, &plan))
721       DBUG_RETURN(1);
722   }
723 
724   // Update m_current_query_cost to reflect actual need of filesort.
725   if (sort_cost > 0.0 && !explain_flags.any(ESP_USING_FILESORT))
726   {
727     best_read-= sort_cost;
728     sort_cost= 0.0;
729     if (thd->lex->is_single_level_stmt())
730       thd->m_current_query_cost= best_read;
731   }
732 
733   count_field_types(select_lex, &tmp_table_param, all_fields, false, false);
734   // Make plan visible for EXPLAIN
735   set_plan_state(PLAN_READY);
736 
737   DEBUG_SYNC(thd, "after_join_optimize");
738 
739   error= 0;
740   DBUG_RETURN(0);
741 
742 setup_subq_exit:
743 
744   assert(zero_result_cause != NULL);
745   /*
746     Even with zero matching rows, subqueries in the HAVING clause may
747     need to be evaluated if there are aggregate functions in the
748     query. If this JOIN is part of an outer query, subqueries in HAVING may
749     be evaluated several times in total; so subquery materialization makes
750     sense.
751   */
752   child_subquery_can_materialize= true;
753   trace_steps.end();   // because all steps are done
754   Opt_trace_object(trace, "empty_result")
755     .add_alnum("cause", zero_result_cause);
756 
757   having_for_explain= having_cond;
758   error= 0;
759 
760   if (!qep_tab && best_ref)
761   {
762     /*
763       After creation of JOIN_TABs in make_join_plan(), we have shortcut due to
764       some zero_result_cause. For simplification, if we have JOIN_TABs we
765       want QEP_TABs too.
766     */
767     if (alloc_qep(tables))
768       DBUG_RETURN(1);                           /* purecov: inspected */
769     unplug_join_tabs();
770   }
771 
772   set_plan_state(ZERO_RESULT);
773   DBUG_RETURN(0);
774 }
775 
776 
777 /**
778   Substitute all expressions in the WHERE condition and ORDER/GROUP lists
779   that match generated columns (GC) expressions with GC fields, if any.
780 
781   @details This function does 3 things:
782   1) Creates list of all GC fields that are a part of a key and the GC
783     expression is a function. All query tables are scanned. If there's no
784     such fields, function exits.
785   2) By means of Item::compile() WHERE clause is transformed.
786     @see Item_func::gc_subst_transformer() for details.
787   3) If there's ORDER/GROUP BY clauses, this function tries to substitute
788     expressions in these lists with GC too. It removes from the list of
789     indexed GC all elements which index blocked by hints. This is done to
790     reduce amount of further work. Next it goes through ORDER/GROUP BY list
791     and matches the expression in it against GC expressions in indexed GC
792     list. When a match is found, the expression is replaced with a new
793     Item_field for the matched GC field. Also, this new field is added to
794     the hidden part of all_fields list.
795 
796   @param thd         thread handle
797   @param select_lex  the current select
798   @param where_cond  the WHERE condition, possibly NULL
799   @param group_list  the GROUP BY clause, possibly NULL
800   @param order       the ORDER BY clause, possibly NULL
801 
802   @return true if the GROUP BY clause or the ORDER BY clause was
803           changed, false otherwise
804 */
805 
substitute_gc(THD * thd,SELECT_LEX * select_lex,Item * where_cond,ORDER * group_list,ORDER * order)806 bool substitute_gc(THD *thd, SELECT_LEX *select_lex, Item *where_cond,
807                    ORDER *group_list, ORDER *order)
808 {
809   List<Field> indexed_gc;
810   Opt_trace_context * const trace= &thd->opt_trace;
811   Opt_trace_object trace_wrapper(trace);
812   Opt_trace_object subst_gc(trace, "substitute_generated_columns");
813 
814   // Collect all GCs that are a part of a key
815   for (TABLE_LIST *tl= select_lex->leaf_tables;
816        tl;
817        tl= tl->next_leaf)
818   {
819     if (tl->table->s->keys == 0)
820       continue;
821     for (uint i= 0; i < tl->table->s->fields; i++)
822     {
823       Field *fld= tl->table->field[i];
824       if (fld->is_gcol() && !fld->part_of_key.is_clear_all() &&
825           fld->gcol_info->expr_item->can_be_substituted_for_gc())
826       {
827         // Don't check allowed keys here as conditions/group/order use
828         // different keymaps for that.
829         indexed_gc.push_back(fld);
830       }
831     }
832   }
833   // No GC in the tables used in the query
834   if (indexed_gc.elements == 0)
835     return false;
836 
837   if (where_cond)
838   {
839     // Item_func::compile will dereference this pointer, provide valid value.
840     uchar i, *dummy= &i;
841     where_cond->compile(&Item::gc_subst_analyzer, &dummy,
842                         &Item::gc_subst_transformer, (uchar*) &indexed_gc);
843     subst_gc.add("resulting_condition", where_cond);
844   }
845 
846   if (!(group_list || order))
847     return false;
848   // Filter out GCs that do not have index usable for GROUP/ORDER
849   Field *gc;
850   List_iterator<Field> li(indexed_gc);
851 
852   while ((gc= li++))
853   {
854     key_map tkm= gc->part_of_key;
855     tkm.intersect(group_list ? gc->table->keys_in_use_for_group_by :
856                   gc->table->keys_in_use_for_order_by);
857     if (tkm.is_clear_all())
858       li.remove();
859   }
860   if (!indexed_gc.elements)
861     return false;
862 
863   // Index could be used for ORDER only if there is no GROUP
864   ORDER *list= group_list ? group_list : order;
865   bool changed= false;
866   for (ORDER *ord= list; ord; ord= ord->next)
867   {
868     li.rewind();
869     if (!(*ord->item)->can_be_substituted_for_gc())
870       continue;
871     while ((gc= li++))
872     {
873       Item_func *tmp= pointer_cast<Item_func*>(*ord->item);
874       Item_field *field;
875       if ((field= get_gc_for_expr(&tmp, gc, gc->result_type())))
876       {
877 
878         changed= true;
879         /* Add new field to field list. */
880         ord->item= select_lex->add_hidden_item(field);
881         break;
882       }
883     }
884   }
885   if (changed && trace->is_started())
886   {
887     String str;
888     st_select_lex::print_order(&str, list,
889                                enum_query_type(QT_TO_SYSTEM_CHARSET |
890                                                QT_SHOW_SELECT_NUMBER |
891                                                QT_NO_DEFAULT_DB));
892     subst_gc.add_utf8(group_list ? "resulting_GROUP_BY" :
893                       "resulting_ORDER_BY",
894                       str.ptr(), str.length());
895   }
896   return changed;
897 }
898 
899 
900 /**
901    Sets the plan's state of the JOIN. This is always the final step of
902    optimization; starting from this call, we expose the plan to other
903    connections (via EXPLAIN CONNECTION) so the plan has to be final.
904    QEP_TAB's quick_optim, condition_optim and keyread_optim are set here.
905 */
set_plan_state(enum_plan_state plan_state_arg)906 void JOIN::set_plan_state(enum_plan_state plan_state_arg)
907 {
908   // A plan should not change to another plan:
909   assert(plan_state_arg == NO_PLAN || plan_state == NO_PLAN);
910   if (plan_state == NO_PLAN && plan_state_arg != NO_PLAN)
911   {
912     if (qep_tab != NULL)
913     {
914       /*
915         We want to cover primary tables, tmp tables (they may have a sort, so
916         their "quick" and "condition" may change when execution runs the
917         sort), and sj-mat inner tables. Note that make_tmp_tables_info() may
918         have added a sort to the first non-const primary table, so it's
919         important to do those assignments after make_tmp_tables_info().
920       */
921       for (uint i= const_tables; i < tables; ++i)
922       {
923         qep_tab[i].set_quick_optim();
924         qep_tab[i].set_condition_optim();
925         qep_tab[i].set_keyread_optim();
926       }
927     }
928   }
929 
930   DEBUG_SYNC(thd, "before_set_plan");
931 
932   // If SQLCOM_END, no thread is explaining our statement anymore.
933   const bool need_lock= thd->query_plan.get_command() != SQLCOM_END;
934 
935   if (need_lock)
936     thd->lock_query_plan();
937   plan_state= plan_state_arg;
938   if (need_lock)
939     thd->unlock_query_plan();
940 }
941 
942 
alloc_qep(uint n)943 bool JOIN::alloc_qep(uint n)
944 {
945   // Just to be sure that type plan_idx is wide enough:
946   compile_time_assert(MAX_TABLES <= INT_MAX8);
947 
948   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
949 
950   qep_tab= new(thd->mem_root) QEP_TAB[n];
951   if (!qep_tab)
952     return true;                                /* purecov: inspected */
953   for (uint i= 0; i < n; ++i)
954     qep_tab[i].init(best_ref[i]);
955   return false;
956 }
957 
958 
init(JOIN_TAB * jt)959 void QEP_TAB::init(JOIN_TAB *jt)
960 {
961   jt->share_qs(this);
962   set_table(table()); // to update table()->reginfo.qep_tab
963   table_ref= jt->table_ref;
964 }
965 
966 
967 /// @returns semijoin strategy for this table.
get_sj_strategy() const968 uint QEP_TAB::get_sj_strategy() const
969 {
970   if (first_sj_inner() == NO_PLAN_IDX)
971     return SJ_OPT_NONE;
972   const uint s= join()->qep_tab[first_sj_inner()].position()->sj_strategy;
973   assert(s != SJ_OPT_NONE);
974   return s;
975 }
976 
977 /**
978   Return the index used for a table in a QEP
979 
980   The various access methods have different places where the index/key
981   number is stored, so this function is needed to return the correct value.
982 
983   @returns index number, or MAX_KEY if not applicable.
984 
985   JT_SYSTEM and JT_ALL does not use an index, and will always return MAX_KEY.
986 
987   JT_INDEX_MERGE supports more than one index. Hence MAX_KEY is returned and
988   a further inspection is needed.
989 */
effective_index() const990 uint QEP_TAB::effective_index() const
991 {
992   switch (type())
993   {
994   case JT_SYSTEM:
995     assert(ref().key == -1);
996     return MAX_KEY;
997 
998   case JT_CONST:
999   case JT_EQ_REF:
1000   case JT_REF_OR_NULL:
1001   case JT_REF:
1002     assert(ref().key != -1);
1003     return uint(ref().key);
1004 
1005   case JT_INDEX_SCAN:
1006   case JT_FT:
1007     return index();
1008 
1009   case JT_INDEX_MERGE:
1010     assert(quick()->index == MAX_KEY);
1011     return MAX_KEY;
1012 
1013   case JT_RANGE:
1014     return quick()->index;
1015 
1016   case JT_ALL:
1017   default:
1018     // @todo Check why JT_UNKNOWN is a valid value here.
1019     assert(type() == JT_ALL || type() == JT_UNKNOWN);
1020     return MAX_KEY;
1021   }
1022 }
1023 
get_sj_strategy() const1024 uint JOIN_TAB::get_sj_strategy() const
1025 {
1026   if (first_sj_inner() == NO_PLAN_IDX)
1027     return SJ_OPT_NONE;
1028   ASSERT_BEST_REF_IN_JOIN_ORDER(join());
1029   JOIN_TAB *tab= join()->best_ref[first_sj_inner()];
1030   uint s= tab->position()->sj_strategy;
1031   assert(s != SJ_OPT_NONE);
1032   return s;
1033 }
1034 
1035 
replace_index_subquery()1036 int JOIN::replace_index_subquery()
1037 {
1038   DBUG_ENTER("replace_index_subquery");
1039   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
1040 
1041   if (group_list ||
1042       !(unit->item && unit->item->substype() == Item_subselect::IN_SUBS) ||
1043       primary_tables != 1 || !where_cond ||
1044       unit->is_union())
1045     DBUG_RETURN(0);
1046 
1047   // Guaranteed by remove_redundant_subquery_clauses():
1048   assert(order == NULL && !select_distinct);
1049 
1050   subselect_engine *engine= NULL;
1051   Item_in_subselect * const in_subs=
1052     static_cast<Item_in_subselect *>(unit->item);
1053   enum join_type type= JT_UNKNOWN;
1054 
1055   JOIN_TAB *const first_join_tab= best_ref[0];
1056 
1057   if (in_subs->exec_method == Item_exists_subselect::EXEC_MATERIALIZATION)
1058   {
1059     // We cannot have two engines at the same time
1060   }
1061   else if (having_cond == NULL)
1062   {
1063     if (first_join_tab->type() == JT_EQ_REF &&
1064         first_join_tab->ref().items[0]->item_name.ptr() == in_left_expr_name)
1065     {
1066       type= JT_UNIQUE_SUBQUERY;
1067       /*
1068         This uses test_if_ref(), which needs access to JOIN_TAB::join_cond() so
1069         it must be done before we get rid of JOIN_TAB.
1070       */
1071       remove_subq_pushed_predicates();
1072     }
1073     else if (first_join_tab->type() == JT_REF &&
1074              first_join_tab->ref().items[0]->item_name.ptr() == in_left_expr_name)
1075     {
1076       type= JT_INDEX_SUBQUERY;
1077       remove_subq_pushed_predicates();
1078     }
1079   }
1080   else if (first_join_tab->type() == JT_REF_OR_NULL &&
1081            first_join_tab->ref().items[0]->item_name.ptr() == in_left_expr_name &&
1082            having_cond->item_name.ptr() == in_having_cond)
1083   {
1084     type= JT_INDEX_SUBQUERY;
1085     where_cond= remove_additional_cond(where_cond);
1086   }
1087 
1088   if (type == JT_UNKNOWN)
1089     DBUG_RETURN(0);
1090 
1091   if (alloc_qep(tables))
1092     DBUG_RETURN(-1);                            /* purecov: inspected */
1093   unplug_join_tabs();
1094 
1095   error= 0;
1096   QEP_TAB *const first_qep_tab= &qep_tab[0];
1097 
1098   if (first_qep_tab->table()->covering_keys.is_set(first_qep_tab->ref().key))
1099   {
1100     assert(!first_qep_tab->table()->no_keyread);
1101     first_qep_tab->table()->set_keyread(true);
1102   }
1103   // execution uses where_cond:
1104   first_qep_tab->set_condition(where_cond);
1105 
1106   engine=
1107     new subselect_indexsubquery_engine(thd, first_qep_tab, unit->item,
1108                                        where_cond,
1109                                        having_cond,
1110                                        // check_null
1111                                        first_qep_tab->type() == JT_REF_OR_NULL,
1112                                        // unique
1113                                        type == JT_UNIQUE_SUBQUERY);
1114   /**
1115      @todo If having_cond!=NULL we pass unique=false. But for this query:
1116      (oe1, oe2) IN (SELECT primary_key, non_key_maybe_null_field FROM tbl)
1117      we could use "unique=true" for the first index component and let
1118      Item_is_not_null_test(non_key_maybe_null_field) handle the second.
1119   */
1120 
1121   first_qep_tab->set_type(type);
1122 
1123   if (!unit->item->change_engine(engine))
1124     DBUG_RETURN(1);
1125   else // error:
1126     DBUG_RETURN(-1);                            /* purecov: inspected */
1127 }
1128 
1129 
optimize_distinct_group_order()1130 bool JOIN::optimize_distinct_group_order()
1131 {
1132   DBUG_ENTER("optimize_distinct_group_order");
1133   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
1134 
1135   /* Optimize distinct away if possible */
1136   {
1137     ORDER *org_order= order;
1138     order= ORDER_with_src(remove_const(order, where_cond, 1, &simple_order,
1139                                        "ORDER BY"),
1140                           order.src);
1141     if (thd->is_error())
1142     {
1143       error= 1;
1144       DBUG_PRINT("error",("Error from remove_const"));
1145       DBUG_RETURN(true);
1146     }
1147 
1148     /*
1149       If we are using ORDER BY NULL or ORDER BY const_expression,
1150       return result in any order (even if we are using a GROUP BY)
1151     */
1152     if (!order && org_order)
1153       skip_sort_order= 1;
1154   }
1155   /*
1156      Check if we can optimize away GROUP BY/DISTINCT.
1157      We can do that if there are no aggregate functions, the
1158      fields in DISTINCT clause (if present) and/or columns in GROUP BY
1159      (if present) contain direct references to all key parts of
1160      an unique index (in whatever order) and if the key parts of the
1161      unique index cannot contain NULLs.
1162      Note that the unique keys for DISTINCT and GROUP BY should not
1163      be the same (as long as they are unique).
1164 
1165      The FROM clause must contain a single non-constant table.
1166 
1167      @todo Apart from the LIS test, every condition depends only on facts
1168      which can be known in SELECT_LEX::prepare(), possibly this block should
1169      move there.
1170   */
1171 
1172   JOIN_TAB *const tab= best_ref[const_tables];
1173 
1174   if (plan_is_single_table() &&
1175       (group_list || select_distinct) &&
1176       !tmp_table_param.sum_func_count &&
1177       (!tab->quick() ||
1178        tab->quick()->get_type() !=
1179        QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX))
1180   {
1181     if (group_list && rollup.state == ROLLUP::STATE_NONE &&
1182        list_contains_unique_index(tab,
1183                                  find_field_in_order_list,
1184                                  (void *) group_list))
1185     {
1186       /*
1187         We have found that grouping can be removed since groups correspond to
1188         only one row anyway, but we still have to guarantee correct result
1189         order. The line below effectively rewrites the query from GROUP BY
1190         <fields> to ORDER BY <fields>. There are three exceptions:
1191         - if skip_sort_order is set (see above), then we can simply skip
1192           GROUP BY;
1193         - if IN(subquery), likewise (see remove_redundant_subquery_clauses())
1194         - we can only rewrite ORDER BY if the ORDER BY fields are 'compatible'
1195           with the GROUP BY ones, i.e. either one is a prefix of another.
1196           We only check if the ORDER BY is a prefix of GROUP BY. In this case
1197           test_if_subpart() copies the ASC/DESC attributes from the original
1198           ORDER BY fields.
1199           If GROUP BY is a prefix of ORDER BY, then it is safe to leave
1200           'order' as is.
1201        */
1202       if (!order || test_if_subpart(group_list, order))
1203         order= (skip_sort_order ||
1204                 (unit->item && unit->item->substype() ==
1205                  Item_subselect::IN_SUBS)) ? NULL : group_list;
1206 
1207       /*
1208         If we have an IGNORE INDEX FOR GROUP BY(fields) clause, this must be
1209         rewritten to IGNORE INDEX FOR ORDER BY(fields).
1210       */
1211       best_ref[0]->table()->keys_in_use_for_order_by=
1212         best_ref[0]->table()->keys_in_use_for_group_by;
1213       group_list= 0;
1214       grouped= false;
1215     }
1216     if (select_distinct &&
1217        list_contains_unique_index(tab,
1218                                  find_field_in_item_list,
1219                                  (void *) &fields_list))
1220     {
1221       select_distinct= 0;
1222     }
1223   }
1224   if (!(group_list || tmp_table_param.sum_func_count) &&
1225       select_distinct &&
1226       plan_is_single_table() &&
1227       rollup.state == ROLLUP::STATE_NONE)
1228   {
1229     /*
1230       We are only using one table. In this case we change DISTINCT to a
1231       GROUP BY query if:
1232       - The GROUP BY can be done through indexes (no sort) and the ORDER
1233         BY only uses selected fields.
1234 	(In this case we can later optimize away GROUP BY and ORDER BY)
1235       - We are scanning the whole table without LIMIT
1236         This can happen if:
1237         - We are using CALC_FOUND_ROWS
1238         - We are using an ORDER BY that can't be optimized away.
1239 
1240       We don't want to use this optimization when we are using LIMIT
1241       because in this case we can just create a temporary table that
1242       holds LIMIT rows and stop when this table is full.
1243     */
1244     if (order)
1245     {
1246       skip_sort_order=
1247         test_if_skip_sort_order(tab, order, m_select_limit,
1248                                 true,           // no_changes
1249                                 &tab->table()->keys_in_use_for_order_by,
1250                                 "ORDER BY");
1251       count_field_types(select_lex, &tmp_table_param, all_fields, false, false);
1252     }
1253     ORDER *o;
1254     bool all_order_fields_used;
1255     if ((o= create_distinct_group(thd, ref_ptrs,
1256                                   order, fields_list, all_fields,
1257 				  &all_order_fields_used)))
1258     {
1259       group_list= ORDER_with_src(o, ESC_DISTINCT);
1260       const bool skip_group=
1261         skip_sort_order &&
1262         test_if_skip_sort_order(tab, group_list, m_select_limit,
1263                                 true,         // no_changes
1264                                 &tab->table()->keys_in_use_for_group_by,
1265                                 "GROUP BY");
1266       count_field_types(select_lex, &tmp_table_param, all_fields, false, false);
1267       if ((skip_group && all_order_fields_used) ||
1268 	  m_select_limit == HA_POS_ERROR ||
1269 	  (order && !skip_sort_order))
1270       {
1271 	/*  Change DISTINCT to GROUP BY */
1272 	select_distinct= 0;
1273 	no_order= !order;
1274 	if (all_order_fields_used)
1275 	{
1276 	  if (order && skip_sort_order)
1277 	  {
1278 	    /*
1279 	      Force MySQL to read the table in sorted order to get result in
1280 	      ORDER BY order.
1281 	    */
1282 	    tmp_table_param.quick_group=0;
1283 	  }
1284 	  order=0;
1285         }
1286         grouped= true;                    // For end_write_group
1287       }
1288       else
1289 	group_list= 0;
1290     }
1291     else if (thd->is_fatal_error)         // End of memory
1292       DBUG_RETURN(true);
1293   }
1294   simple_group= 0;
1295   {
1296     ORDER *old_group_list= group_list;
1297     group_list= ORDER_with_src(remove_const(group_list, where_cond,
1298                                             rollup.state == ROLLUP::STATE_NONE,
1299                                             &simple_group, "GROUP BY"),
1300                                group_list.src);
1301 
1302     if (thd->is_error())
1303     {
1304       error= 1;
1305       DBUG_PRINT("error",("Error from remove_const"));
1306       DBUG_RETURN(true);
1307     }
1308     if (old_group_list && !group_list)
1309       select_distinct= 0;
1310   }
1311   if (!group_list && grouped)
1312   {
1313     order=0;					// The output has only one row
1314     simple_order=1;
1315     select_distinct= 0;                       // No need in distinct for 1 row
1316     group_optimized_away= 1;
1317   }
1318 
1319   calc_group_buffer(this, group_list);
1320   send_group_parts= tmp_table_param.group_parts; /* Save org parts */
1321 
1322   if (test_if_subpart(group_list, order) ||
1323       (!group_list && tmp_table_param.sum_func_count))
1324   {
1325     order=0;
1326     if (is_indexed_agg_distinct(this, NULL))
1327       sort_and_group= 0;
1328   }
1329 
1330   DBUG_RETURN(false);
1331 }
1332 
1333 
test_skip_sort()1334 void JOIN::test_skip_sort()
1335 {
1336   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
1337   JOIN_TAB *const tab= best_ref[const_tables];
1338 
1339   assert(ordered_index_usage == ordered_index_void);
1340 
1341   if (group_list)   // GROUP BY honoured first
1342                     // (DISTINCT was rewritten to GROUP BY if skippable)
1343   {
1344     /*
1345       When there is SQL_BIG_RESULT do not sort using index for GROUP BY,
1346       and thus force sorting on disk unless a group min-max optimization
1347       is going to be used as it is applied now only for one table queries
1348       with covering indexes.
1349     */
1350     if (!(select_lex->active_options() & SELECT_BIG_RESULT || with_json_agg) ||
1351         (tab->quick() &&
1352          tab->quick()->get_type() ==
1353            QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX))
1354     {
1355       if (simple_group &&              // GROUP BY is possibly skippable
1356           !select_distinct)            // .. if not preceded by a DISTINCT
1357       {
1358         /*
1359           Calculate a possible 'limit' of table rows for 'GROUP BY':
1360           A specified 'LIMIT' is relative to the final resultset.
1361           'need_tmp' implies that there will be more postprocessing
1362           so the specified 'limit' should not be enforced yet.
1363          */
1364         const ha_rows limit = need_tmp ? HA_POS_ERROR : m_select_limit;
1365 
1366         if (test_if_skip_sort_order(tab, group_list, limit, false,
1367                                     &tab->table()->keys_in_use_for_group_by,
1368                                     "GROUP BY"))
1369         {
1370           ordered_index_usage= ordered_index_group_by;
1371         }
1372       }
1373 
1374       /*
1375         If we are going to use semi-join LooseScan, it will depend
1376         on the selected index scan to be used.  If index is not used
1377         for the GROUP BY, we risk that sorting is put on the LooseScan
1378         table.  In order to avoid this, force use of temporary table.
1379         TODO: Explain the quick_group part of the test below.
1380        */
1381       if ((ordered_index_usage != ordered_index_group_by) &&
1382           (tmp_table_param.quick_group ||
1383            (tab->emb_sj_nest &&
1384             tab->position()->sj_strategy == SJ_OPT_LOOSE_SCAN)))
1385       {
1386         need_tmp= true;
1387         simple_order= simple_group= false; // Force tmp table without sort
1388       }
1389     }
1390   }
1391   else if (order &&                      // ORDER BY wo/ preceding GROUP BY
1392            (simple_order || skip_sort_order)) // which is possibly skippable
1393   {
1394     if (test_if_skip_sort_order(tab, order, m_select_limit, false,
1395                                 &tab->table()->keys_in_use_for_order_by,
1396                                 "ORDER BY"))
1397     {
1398       ordered_index_usage= ordered_index_order_by;
1399     }
1400   }
1401 }
1402 
1403 
1404 /**
1405   Test if ORDER BY is a single MATCH function(ORDER BY MATCH)
1406   and sort order is descending.
1407 
1408   @param order                 pointer to ORDER struct.
1409 
1410   @retval
1411     Pointer to MATCH function if order is 'ORDER BY MATCH() DESC'
1412   @retval
1413     NULL otherwise
1414 */
1415 
test_if_ft_index_order(ORDER * order)1416 static Item_func_match *test_if_ft_index_order(ORDER *order)
1417 {
1418   if (order && order->next == NULL &&
1419       order->direction == ORDER::ORDER_DESC &&
1420       (*order->item)->type() == Item::FUNC_ITEM &&
1421       ((Item_func*) (*order->item))->functype() == Item_func::FT_FUNC)
1422     return static_cast<Item_func_match*> (*order->item)->get_master();
1423 
1424   return NULL;
1425 }
1426 
1427 /**
1428   Test if this is a prefix index.
1429 
1430   @param   table     table
1431   @param   idx       index to check
1432 
1433   @return TRUE if this is a prefix index
1434 */
is_prefix_index(TABLE * table,uint idx)1435 bool is_prefix_index(TABLE* table, uint idx)
1436 {
1437   if (!table || !table->key_info)
1438   {
1439     return false;
1440   }
1441   KEY* key_info = table->key_info;
1442   uint key_parts = key_info[idx].user_defined_key_parts;
1443   KEY_PART_INFO* key_part = key_info[idx].key_part;
1444 
1445   for (uint i = 0; i < key_parts; i++, key_part++)
1446   {
1447     if (key_part->field &&
1448       (key_part->length !=
1449         table->field[key_part->fieldnr - 1]->key_length() &&
1450         !(key_info->flags & (HA_FULLTEXT | HA_SPATIAL))))
1451     {
1452       return true;
1453     }
1454   }
1455   return false;
1456 }
1457 
1458 /**
1459   Test if one can use the key to resolve ordering.
1460 
1461   @param order               Sort order
1462   @param table               Table to sort
1463   @param idx                 Index to check
1464   @param[out] used_key_parts NULL by default, otherwise return value for
1465                              used key parts.
1466 
1467   @note
1468     used_key_parts is set to correct key parts used if return value != 0
1469     (On other cases, used_key_part may be changed)
1470     Note that the value may actually be greater than the number of index
1471     key parts. This can happen for storage engines that have the primary
1472     key parts as a suffix for every secondary key.
1473 
1474   @retval
1475     1   key is ok.
1476   @retval
1477     0   Key can't be used
1478   @retval
1479     -1   Reverse key can be used
1480 */
1481 
test_if_order_by_key(ORDER * order,TABLE * table,uint idx,uint * used_key_parts)1482 int test_if_order_by_key(ORDER *order, TABLE *table, uint idx,
1483                          uint *used_key_parts)
1484 {
1485   KEY_PART_INFO *key_part,*key_part_end;
1486   key_part=table->key_info[idx].key_part;
1487   key_part_end=key_part+table->key_info[idx].user_defined_key_parts;
1488   key_part_map const_key_parts=table->const_key_parts[idx];
1489   int reverse=0;
1490   uint key_parts;
1491   my_bool on_pk_suffix= FALSE;
1492   DBUG_ENTER("test_if_order_by_key");
1493 
1494   for (; order ; order=order->next, const_key_parts>>=1)
1495   {
1496 
1497     /*
1498       Since only fields can be indexed, ORDER BY <something> that is
1499       not a field cannot be resolved by using an index.
1500     */
1501     Item *real_itm= (*order->item)->real_item();
1502     if (real_itm->type() != Item::FIELD_ITEM)
1503       DBUG_RETURN(0);
1504 
1505     Field *field= static_cast<Item_field*>(real_itm)->field;
1506     int flag;
1507 
1508     /*
1509       Skip key parts that are constants in the WHERE clause.
1510       These are already skipped in the ORDER BY by const_expression_in_where()
1511     */
1512     for (; const_key_parts & 1 && key_part < key_part_end ;
1513          const_key_parts>>= 1)
1514       key_part++;
1515 
1516     /* Avoid usage of prefix index for sorting a partition table */
1517     if (table->part_info && key_part != table->key_info[idx].key_part &&
1518         key_part != key_part_end && is_prefix_index(table, idx))
1519      DBUG_RETURN(0);
1520 
1521     if (key_part == key_part_end)
1522     {
1523       /*
1524         We are at the end of the key. Check if the engine has the primary
1525         key as a suffix to the secondary keys. If it has continue to check
1526         the primary key as a suffix.
1527       */
1528       if (!on_pk_suffix &&
1529           (table->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX) &&
1530           table->s->primary_key != MAX_KEY &&
1531           table->s->primary_key != idx)
1532       {
1533         on_pk_suffix= TRUE;
1534         key_part= table->key_info[table->s->primary_key].key_part;
1535         key_part_end=key_part +
1536           table->key_info[table->s->primary_key].user_defined_key_parts;
1537         const_key_parts=table->const_key_parts[table->s->primary_key];
1538 
1539         for (; const_key_parts & 1 ; const_key_parts>>= 1)
1540           key_part++;
1541         /*
1542          The primary and secondary key parts were all const (i.e. there's
1543          one row).  The sorting doesn't matter.
1544         */
1545         if (key_part == key_part_end && reverse == 0)
1546         {
1547           key_parts= 0;
1548           reverse= 1;
1549           goto ok;
1550         }
1551       }
1552       else
1553         DBUG_RETURN(0);
1554     }
1555 
1556     if (key_part->field != field || !field->part_of_sortkey.is_set(idx))
1557       DBUG_RETURN(0);
1558 
1559     const ORDER::enum_order keypart_order=
1560       (key_part->key_part_flag & HA_REVERSE_SORT) ?
1561       ORDER::ORDER_DESC : ORDER::ORDER_ASC;
1562     /* set flag to 1 if we can use read-next on key, else to -1 */
1563     flag= (order->direction == keypart_order) ? 1 : -1;
1564     if (reverse && flag != reverse)
1565       DBUG_RETURN(0);
1566     reverse=flag;				// Remember if reverse
1567     key_part++;
1568   }
1569   if (on_pk_suffix)
1570   {
1571     uint used_key_parts_secondary= table->key_info[idx].user_defined_key_parts;
1572     uint used_key_parts_pk=
1573       (uint) (key_part - table->key_info[table->s->primary_key].key_part);
1574     key_parts= used_key_parts_pk + used_key_parts_secondary;
1575 
1576     if (reverse == -1 &&
1577         (!(table->file->index_flags(idx, used_key_parts_secondary - 1, 1) &
1578            HA_READ_PREV) ||
1579          !(table->file->index_flags(table->s->primary_key,
1580                                     used_key_parts_pk - 1, 1) & HA_READ_PREV)))
1581       reverse= 0;                               // Index can't be used
1582   }
1583   else
1584   {
1585     key_parts= (uint) (key_part - table->key_info[idx].key_part);
1586     if (reverse == -1 &&
1587         !(table->file->index_flags(idx, key_parts-1, 1) & HA_READ_PREV))
1588       reverse= 0;                               // Index can't be used
1589   }
1590 ok:
1591   if (used_key_parts != NULL)
1592     *used_key_parts= key_parts;
1593   DBUG_RETURN(reverse);
1594 }
1595 
1596 
1597 /**
1598   Find shortest key suitable for full table scan.
1599 
1600   @param table                 Table to scan
1601   @param usable_keys           Allowed keys
1602 
1603   @note
1604      As far as
1605      1) clustered primary key entry data set is a set of all record
1606         fields (key fields and not key fields) and
1607      2) secondary index entry data is a union of its key fields and
1608         primary key fields (at least InnoDB and its derivatives don't
1609         duplicate primary key fields there, even if the primary and
1610         the secondary keys have a common subset of key fields),
1611      then secondary index entry data is always a subset of primary key entry.
1612      Unfortunately, key_info[nr].key_length doesn't show the length
1613      of key/pointer pair but a sum of key field lengths only, thus
1614      we can't estimate index IO volume comparing only this key_length
1615      value of secondary keys and clustered PK.
1616      So, try secondary keys first, and choose PK only if there are no
1617      usable secondary covering keys or found best secondary key include
1618      all table fields (i.e. same as PK):
1619 
1620   @return
1621     MAX_KEY     no suitable key found
1622     key index   otherwise
1623 */
1624 
find_shortest_key(TABLE * table,const key_map * usable_keys)1625 uint find_shortest_key(TABLE *table, const key_map *usable_keys)
1626 {
1627   uint best= MAX_KEY;
1628   uint usable_clustered_pk= (table->file->primary_key_is_clustered() &&
1629                              table->s->primary_key != MAX_KEY &&
1630                              usable_keys->is_set(table->s->primary_key)) ?
1631                             table->s->primary_key : MAX_KEY;
1632   if (!usable_keys->is_clear_all())
1633   {
1634     uint min_length= (uint) ~0;
1635     for (uint nr=0; nr < table->s->keys ; nr++)
1636     {
1637       if (nr == usable_clustered_pk)
1638         continue;
1639       if (usable_keys->is_set(nr))
1640       {
1641         /*
1642           Can not do full index scan on rtree index because it is not
1643           supported by Innodb, probably not supported by others either.
1644          */
1645         const KEY &key_ref= table->key_info[nr];
1646         if (key_ref.key_length < min_length &&
1647             !(key_ref.flags & HA_SPATIAL))
1648         {
1649           min_length=key_ref.key_length;
1650           best=nr;
1651         }
1652       }
1653     }
1654   }
1655   if (usable_clustered_pk != MAX_KEY)
1656   {
1657     /*
1658      If the primary key is clustered and found shorter key covers all table
1659      fields and is not clustering then primary key scan normally would be
1660      faster because amount of data to scan is the same but PK is clustered.
1661      It's safe to compare key parts with table fields since duplicate key
1662      parts aren't allowed.
1663      */
1664     if (best == MAX_KEY ||
1665         ((table->key_info[best].user_defined_key_parts >= table->s->fields)
1666          && !(table->file->index_flags(best, 0, 0) & HA_CLUSTERED_INDEX)))
1667       best= usable_clustered_pk;
1668   }
1669   return best;
1670 }
1671 
1672 /**
1673   Test if a second key is the subkey of the first one.
1674 
1675   @param key_part              First key parts
1676   @param ref_key_part          Second key parts
1677   @param ref_key_part_end      Last+1 part of the second key
1678 
1679   @note
1680     Second key MUST be shorter than the first one.
1681 
1682   @retval
1683     1	is a subkey
1684   @retval
1685     0	no sub key
1686 */
1687 
1688 inline bool
is_subkey(KEY_PART_INFO * key_part,KEY_PART_INFO * ref_key_part,KEY_PART_INFO * ref_key_part_end)1689 is_subkey(KEY_PART_INFO *key_part, KEY_PART_INFO *ref_key_part,
1690 	  KEY_PART_INFO *ref_key_part_end)
1691 {
1692   for (; ref_key_part < ref_key_part_end; key_part++, ref_key_part++)
1693     if (!key_part->field->eq(ref_key_part->field))
1694       return 0;
1695   return 1;
1696 }
1697 
1698 
1699 /**
1700   Test if REF_OR_NULL optimization will be used if the specified
1701   ref_key is used for REF-access to 'tab'
1702 
1703   @retval
1704     true	JT_REF_OR_NULL will be used
1705   @retval
1706     false	no JT_REF_OR_NULL access
1707 */
1708 
1709 static bool
is_ref_or_null_optimized(const JOIN_TAB * tab,uint ref_key)1710 is_ref_or_null_optimized(const JOIN_TAB *tab, uint ref_key)
1711 {
1712   if (tab->keyuse())
1713   {
1714     const Key_use *keyuse= tab->keyuse();
1715     while (keyuse->key != ref_key && keyuse->table_ref == tab->table_ref)
1716       keyuse++;
1717 
1718     const table_map const_tables= tab->join()->const_table_map;
1719     while (keyuse->key == ref_key && keyuse->table_ref == tab->table_ref)
1720     {
1721       if (!(keyuse->used_tables & ~const_tables))
1722       {
1723         if (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)
1724           return true;
1725       }
1726       keyuse++;
1727     }
1728   }
1729   return false;
1730 }
1731 
1732 
1733 /**
1734   Test if we can use one of the 'usable_keys' instead of 'ref' key
1735   for sorting.
1736 
1737   @param ref			Number of key, used for WHERE clause
1738   @param usable_keys		Keys for testing
1739 
1740   @return
1741     - MAX_KEY			If we can't use other key
1742     - the number of found key	Otherwise
1743 */
1744 
1745 static uint
test_if_subkey(ORDER * order,JOIN_TAB * tab,uint ref,uint ref_key_parts,const key_map * usable_keys)1746 test_if_subkey(ORDER *order, JOIN_TAB *tab, uint ref, uint ref_key_parts,
1747 	       const key_map *usable_keys)
1748 {
1749   uint nr;
1750   uint min_length= (uint) ~0;
1751   uint best= MAX_KEY;
1752   TABLE *table= tab->table();
1753   KEY_PART_INFO *ref_key_part= table->key_info[ref].key_part;
1754   KEY_PART_INFO *ref_key_part_end= ref_key_part + ref_key_parts;
1755 
1756   for (nr= 0 ; nr < table->s->keys ; nr++)
1757   {
1758     if (usable_keys->is_set(nr) &&
1759 	table->key_info[nr].key_length < min_length &&
1760 	table->key_info[nr].user_defined_key_parts >= ref_key_parts &&
1761 	is_subkey(table->key_info[nr].key_part, ref_key_part,
1762 		  ref_key_part_end) &&
1763         !is_ref_or_null_optimized(tab, nr) &&
1764 	test_if_order_by_key(order, table, nr))
1765     {
1766       min_length= table->key_info[nr].key_length;
1767       best= nr;
1768     }
1769   }
1770   return best;
1771 }
1772 
1773 
1774 /**
1775   It is not obvious to see that test_if_skip_sort_order() never changes the
1776   plan if no_changes is true. So we double-check: creating an instance of this
1777   class saves some important access-path-related information of the current
1778   table; when the instance is destroyed, the latest access-path information is
1779   compared with saved data.
1780 */
1781 
1782 class Plan_change_watchdog
1783 {
1784 #ifndef NDEBUG
1785 public:
1786   /**
1787     @param tab_arg     table whose access path is being determined
1788     @param no_changes  whether a change to the access path is allowed
1789   */
Plan_change_watchdog(const JOIN_TAB * tab_arg,const bool no_changes_arg)1790   Plan_change_watchdog(const JOIN_TAB *tab_arg, const bool no_changes_arg)
1791   {
1792     // Only to keep gcc 4.1.2-44 silent about uninitialized variables
1793     quick= NULL;
1794     quick_index= 0;
1795     if (no_changes_arg)
1796     {
1797       tab= tab_arg;
1798       type= tab->type();
1799       if ((quick= tab->quick()))
1800         quick_index= quick->index;
1801       use_quick= tab->use_quick;
1802       ref_key= tab->ref().key;
1803       ref_key_parts= tab->ref().key_parts;
1804       index= tab->index();
1805     }
1806     else
1807     {
1808       tab= NULL;
1809       // Only to keep gcc 4.1.2-44 silent about uninitialized variables
1810       type= JT_UNKNOWN;
1811       quick= NULL;
1812       ref_key= ref_key_parts= index= 0;
1813       use_quick= QS_NONE;
1814     }
1815   }
~Plan_change_watchdog()1816   ~Plan_change_watchdog()
1817   {
1818     if (tab == NULL)
1819       return;
1820     // changes are not allowed, we verify:
1821     assert(tab->type() == type);
1822     assert(tab->quick() == quick);
1823     assert((quick == NULL) || tab->quick()->index == quick_index);
1824     assert(tab->use_quick == use_quick);
1825     assert(tab->ref().key == ref_key);
1826     assert(tab->ref().key_parts == ref_key_parts);
1827     assert(tab->index() == index);
1828   }
1829 private:
1830   const JOIN_TAB *tab;            ///< table, or NULL if changes are allowed
1831   enum join_type type;            ///< copy of tab->type()
1832   // "Range / index merge" info
1833   const QUICK_SELECT_I *quick;    ///< copy of tab->select->quick
1834   uint quick_index;               ///< copy of tab->select->quick->index
1835   enum quick_type use_quick;      ///< copy of tab->use_quick
1836   // "ref access" info
1837   int ref_key;                    ///< copy of tab->ref().key
1838   uint ref_key_parts;/// copy of tab->ref().key_parts
1839   // Other index-related info
1840   uint index;                     ///< copy of tab->index
1841 #else // in non-debug build, empty class
1842 public:
1843   Plan_change_watchdog(const JOIN_TAB *tab_arg, const bool no_changes_arg) {}
1844 #endif
1845 };
1846 
1847 
1848 /**
1849   Test if we can skip ordering by using an index.
1850 
1851   If the current plan is to use an index that provides ordering, the
1852   plan will not be changed. Otherwise, if an index can be used, the
1853   JOIN_TAB / tab->select struct is changed to use the index.
1854 
1855   The index must cover all fields in <order>, or it will not be considered.
1856 
1857   @param tab           NULL or JOIN_TAB of the accessed table
1858   @param order         Linked list of ORDER BY arguments
1859   @param select_limit  LIMIT value, or HA_POS_ERROR if no limit
1860   @param no_changes    No changes will be made to the query plan.
1861   @param map           key_map of applicable indexes.
1862   @param clause_type   "ORDER BY" etc for printing in optimizer trace
1863 
1864   @todo
1865     - sergeyp: Results of all index merge selects actually are ordered
1866     by clustered PK values.
1867 
1868   @note
1869   This function may change tmp_table_param.precomputed_group_by. This
1870   affects how create_tmp_table() treats aggregation functions, so
1871   count_field_types() must be called again to make sure this is taken
1872   into consideration.
1873 
1874   @retval
1875     0    We have to use filesort to do the sorting
1876   @retval
1877     1    We can use an index.
1878 */
1879 
1880 static bool
test_if_skip_sort_order(JOIN_TAB * tab,ORDER * order,ha_rows select_limit,const bool no_changes,const key_map * map,const char * clause_type)1881 test_if_skip_sort_order(JOIN_TAB *tab, ORDER *order, ha_rows select_limit,
1882                         const bool no_changes, const key_map *map,
1883                         const char *clause_type)
1884 {
1885   int ref_key;
1886   uint ref_key_parts= 0;
1887   int order_direction= 0;
1888   uint used_key_parts;
1889   TABLE *const table= tab->table();
1890   JOIN *const join= tab->join();
1891   THD *const thd= join->thd;
1892   QUICK_SELECT_I *const save_quick= tab->quick();
1893   int best_key= -1;
1894   bool set_up_ref_access_to_key= false;
1895   bool can_skip_sorting= false;                  // used as return value
1896   int changed_key= -1;
1897   DBUG_ENTER("test_if_skip_sort_order");
1898 
1899   /* Check that we are always called with first non-const table */
1900   assert((uint)tab->idx() == join->const_tables);
1901 
1902   Plan_change_watchdog watchdog(tab, no_changes);
1903 
1904   /* Sorting a single row can always be skipped */
1905   if (tab->type() == JT_EQ_REF ||
1906       tab->type() == JT_CONST  ||
1907       tab->type() == JT_SYSTEM)
1908   {
1909     DBUG_RETURN(1);
1910   }
1911 
1912   /*
1913     Check if FT index can be used to retrieve result in the required order.
1914     It is possible if ordering is on the first non-constant table.
1915   */
1916   if (join->order && join->simple_order)
1917   {
1918     /*
1919       Check if ORDER is DESC, ORDER BY is a single MATCH function.
1920     */
1921     Item_func_match *ft_func= test_if_ft_index_order(order);
1922     /*
1923       Two possible cases when we can skip sort order:
1924       1. FT_SORTED must be set(Natural mode, no ORDER BY).
1925       2. If FT_SORTED flag is not set then
1926       the engine should support deferred sorting. Deferred sorting means
1927       that sorting is postponed utill the start of index reading(InnoDB).
1928       In this case we set FT_SORTED flag here to let the engine know that
1929       internal sorting is needed.
1930     */
1931     if (ft_func && ft_func->ft_handler && ft_func->ordered_result())
1932     {
1933       /*
1934         FT index scan is used, so the only additional requirement is
1935         that ORDER BY MATCH function is the same as the function that
1936         is used for FT index.
1937       */
1938       if (tab->type() == JT_FT &&
1939           ft_func->eq(tab->position()->key->val, true))
1940       {
1941         ft_func->set_hints(join, FT_SORTED, select_limit, false);
1942         DBUG_RETURN(true);
1943       }
1944       /*
1945         No index is used, it's possible to use FT index for ORDER BY if
1946         LIMIT is present and does not exceed count of the records in FT index
1947         and there is no WHERE condition since a condition may potentially
1948         require more rows to be fetch from FT index.
1949       */
1950       else if (!tab->condition() &&
1951                select_limit != HA_POS_ERROR &&
1952                select_limit <= ft_func->get_count())
1953       {
1954         /* test_if_ft_index_order() always returns master MATCH function. */
1955         assert(!ft_func->master);
1956         /* ref is not set since there is no WHERE condition */
1957         assert(tab->ref().key == -1);
1958 
1959         /*Make EXPLAIN happy */
1960         tab->set_type(JT_FT);
1961         tab->ref().key= ft_func->key;
1962         tab->ref().key_parts= 0;
1963         tab->set_index(ft_func->key);
1964         tab->set_ft_func(ft_func);
1965 
1966         /* Setup FT handler */
1967         ft_func->set_hints(join, FT_SORTED, select_limit, true);
1968         ft_func->join_key= true;
1969         table->file->ft_handler= ft_func->ft_handler;
1970         DBUG_RETURN(true);
1971       }
1972     }
1973   }
1974 
1975   /*
1976     Keys disabled by ALTER TABLE ... DISABLE KEYS should have already
1977     been taken into account.
1978   */
1979   key_map usable_keys= *map;
1980 
1981   for (ORDER *tmp_order=order; tmp_order ; tmp_order=tmp_order->next)
1982   {
1983     Item *item= (*tmp_order->item)->real_item();
1984     if (item->type() != Item::FIELD_ITEM)
1985     {
1986       usable_keys.clear_all();
1987       DBUG_RETURN(0);
1988     }
1989     usable_keys.intersect(((Item_field*) item)->field->part_of_sortkey);
1990     if (usable_keys.is_clear_all())
1991       DBUG_RETURN(0);					// No usable keys
1992   }
1993   if (tab->type() == JT_REF_OR_NULL || tab->type() == JT_FT)
1994     DBUG_RETURN(0);
1995 
1996   ref_key= -1;
1997   /* Test if constant range in WHERE */
1998   if (tab->type() == JT_REF)
1999   {
2000     assert(tab->ref().key >= 0 && tab->ref().key_parts);
2001     ref_key=	   tab->ref().key;
2002     ref_key_parts= tab->ref().key_parts;
2003   }
2004   else if (tab->type() == JT_RANGE || tab->type() == JT_INDEX_MERGE)
2005   {
2006     // Range found by opt_range
2007     int quick_type= tab->quick()->get_type();
2008     /*
2009       assume results are not ordered when index merge is used
2010       TODO: sergeyp: Results of all index merge selects actually are ordered
2011       by clustered PK values.
2012     */
2013 
2014     if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE ||
2015         quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION ||
2016         quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT)
2017       DBUG_RETURN(0);
2018     ref_key=	   tab->quick()->index;
2019     ref_key_parts= tab->quick()->used_key_parts;
2020   }
2021   else if (tab->type() == JT_INDEX_SCAN)
2022   {
2023     // The optimizer has decided to use an index scan.
2024     ref_key=       tab->index();
2025     ref_key_parts= actual_key_parts(&table->key_info[tab->index()]);
2026   }
2027 
2028   Opt_trace_context * const trace= &thd->opt_trace;
2029   Opt_trace_object trace_wrapper(trace);
2030   Opt_trace_object
2031     trace_skip_sort_order(trace, "reconsidering_access_paths_for_index_ordering");
2032   trace_skip_sort_order.add_alnum("clause", clause_type);
2033   Opt_trace_array trace_steps(trace, "steps");
2034 
2035   if (ref_key >= 0)
2036   {
2037     /*
2038       We come here when ref/index scan/range scan access has been set
2039       up for this table. Do not change access method if ordering is
2040       provided already.
2041     */
2042     if (!usable_keys.is_set(ref_key))
2043     {
2044       /*
2045         We come here when ref_key is not among usable_keys, try to find a
2046         usable prefix key of that key.
2047       */
2048       uint new_ref_key;
2049       /*
2050 	If using index only read, only consider other possible index only
2051 	keys
2052       */
2053       if (table->covering_keys.is_set(ref_key))
2054 	usable_keys.intersect(table->covering_keys);
2055 
2056       if ((new_ref_key= test_if_subkey(order, tab, ref_key, ref_key_parts,
2057 				       &usable_keys)) < MAX_KEY)
2058       {
2059 	/* Found key that can be used to retrieve data in sorted order */
2060 	if (tab->ref().key >= 0)
2061         {
2062           /*
2063             We'll use ref access method on key new_ref_key. The actual change
2064             is done further down in this function where we update the plan.
2065           */
2066           set_up_ref_access_to_key= true;
2067         }
2068 	else if (!no_changes)
2069 	{
2070           /*
2071             The range optimizer constructed QUICK_RANGE for ref_key, and
2072             we want to use instead new_ref_key as the index. We can't
2073             just change the index of the quick select, because this may
2074             result in an incosistent QUICK_SELECT object. Below we
2075             create a new QUICK_SELECT from scratch so that all its
2076             parameres are set correctly by the range optimizer.
2077 
2078             Note that the range optimizer is NOT called if
2079             no_changes==true. This reason is that the range optimizer
2080             cannot find a QUICK that can return ordered result unless
2081             index access (ref or index scan) is also able to do so
2082             (which test_if_order_by_key () will tell).
2083             Admittedly, range access may be much more efficient than
2084             e.g. index scan, but the only thing that matters when
2085             no_change==true is the answer to the question: "Is it
2086             possible to avoid sorting if an index is used to access
2087             this table?". The answer does not depend on the outcome of
2088             the range optimizer.
2089           */
2090           key_map new_ref_key_map;  // Force the creation of quick select
2091           new_ref_key_map.set_bit(new_ref_key); // only for new_ref_key.
2092 
2093           Opt_trace_object trace_wrapper(trace);
2094           Opt_trace_object
2095             trace_recest(trace, "rows_estimation");
2096           trace_recest.add_utf8_table(tab->table_ref).
2097           add_utf8("index", table->key_info[new_ref_key].name);
2098           QUICK_SELECT_I *qck;
2099           const bool no_quick=
2100             test_quick_select(thd, new_ref_key_map,
2101                               0,       // empty table_map
2102                               join->calc_found_rows ?
2103                                 HA_POS_ERROR :
2104                                 join->unit->select_limit_cnt,
2105                               false,   // don't force quick range
2106                               order->direction, tab,
2107                               // we are after make_join_select():
2108                               tab->condition(), &tab->needed_reg, &qck,
2109                               tab->table()->force_index) <= 0;
2110           assert(tab->quick() == save_quick);
2111           tab->set_quick(qck);
2112           if (no_quick)
2113           {
2114             can_skip_sorting= false;
2115             goto fix_ICP;
2116           }
2117 	}
2118         ref_key= new_ref_key;
2119         changed_key= new_ref_key;
2120       }
2121     }
2122     /* Check if we get the rows in requested sorted order by using the key */
2123     if (usable_keys.is_set(ref_key) &&
2124         (order_direction= test_if_order_by_key(order,table,ref_key,
2125 					       &used_key_parts)))
2126       goto check_reverse_order;
2127   }
2128   {
2129     /*
2130       There is no ref/index scan/range scan access set up for this
2131       table, or it does not provide the requested ordering. Do a
2132       cost-based search on all keys.
2133     */
2134     uint best_key_parts= 0;
2135     uint saved_best_key_parts= 0;
2136     int best_key_direction= 0;
2137     ha_rows table_records= table->file->stats.records;
2138 
2139     /*
2140       If an index scan that cannot provide ordering has been selected
2141       then do not use the index scan key as starting hint to
2142       test_if_cheaper_ordering()
2143     */
2144     const int ref_key_hint= (order_direction == 0 &&
2145                              tab->type() == JT_INDEX_SCAN) ? -1 : ref_key;
2146 
2147     /*
2148       Does the query have a "FORCE INDEX [FOR GROUP BY] (idx)" (if
2149       clause is group by) or a "FORCE INDEX [FOR ORDER BY] (idx)" (if
2150       clause is order by)?
2151     */
2152     const bool is_group_by= join && join->grouped && order == join->group_list;
2153     const bool is_force_index= table->force_index ||
2154       (is_group_by ? table->force_index_group : table->force_index_order);
2155 
2156     /*
2157       Find an ordering index alternative over the chosen plan iff
2158       prefer_ordering_index switch is on. This switch is overridden only when
2159       force index for order/group is specified.
2160     */
2161     if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_PREFER_ORDERING_INDEX) ||
2162         is_force_index)
2163       test_if_cheaper_ordering(tab, order, table, usable_keys,
2164                                ref_key_hint,
2165                                select_limit,
2166                                &best_key, &best_key_direction,
2167                                &select_limit, &best_key_parts,
2168                                &saved_best_key_parts);
2169 
2170     if (best_key < 0)
2171     {
2172       // No usable key has been found
2173       can_skip_sorting= false;
2174       goto fix_ICP;
2175     }
2176 
2177     /*
2178       filesort() and join cache are usually faster than reading in
2179       index order and not using join cache. Don't use index scan
2180       unless:
2181        - the user specified FORCE INDEX [FOR {GROUP|ORDER} BY] (have to assume
2182          the user knows what's best)
2183        - the chosen index is clustered primary key (table scan is not cheaper)
2184     */
2185     if (!is_force_index &&
2186         (select_limit >= table_records) &&
2187         (tab->type() == JT_ALL &&
2188          join->primary_tables > join->const_tables + 1) &&
2189          ((unsigned) best_key != table->s->primary_key ||
2190           !table->file->primary_key_is_clustered()))
2191     {
2192       can_skip_sorting= false;
2193       goto fix_ICP;
2194     }
2195 
2196     if (table->quick_keys.is_set(best_key) &&
2197         !tab->quick_order_tested.is_set(best_key) &&
2198         best_key != ref_key)
2199     {
2200       tab->quick_order_tested.set_bit(best_key);
2201       Opt_trace_object trace_wrapper(trace);
2202       Opt_trace_object
2203         trace_recest(trace, "rows_estimation");
2204       trace_recest.add_utf8_table(tab->table_ref).
2205         add_utf8("index", table->key_info[best_key].name);
2206 
2207       key_map keys_to_use;           // Force the creation of quick select
2208       keys_to_use.set_bit(best_key); // only best_key.
2209       QUICK_SELECT_I *qck;
2210       test_quick_select(thd,
2211                         keys_to_use,
2212                         0,        // empty table_map
2213                         join->calc_found_rows ?
2214                         HA_POS_ERROR :
2215                         join->unit->select_limit_cnt,
2216                         true,     // force quick range
2217                         order->direction, tab, tab->condition(),
2218                         &tab->needed_reg, &qck, tab->table()->force_index);
2219       /*
2220         If tab->quick() pointed to another quick than save_quick, we would
2221         lose access to it and leak memory.
2222       */
2223       assert(tab->quick() == save_quick || tab->quick() == NULL);
2224       tab->set_quick(qck);
2225       if (qck && !no_changes)
2226         tab->set_type(calc_join_type(qck->get_type()));
2227     }
2228     order_direction= best_key_direction;
2229     /*
2230       saved_best_key_parts is actual number of used keyparts found by the
2231       test_if_order_by_key function. It could differ from keyinfo->key_parts,
2232       thus we have to restore it in case of desc order as it affects
2233       QUICK_SELECT_DESC behaviour.
2234     */
2235     used_key_parts= (order_direction == -1) ?
2236       saved_best_key_parts :  best_key_parts;
2237     changed_key= best_key;
2238     // We will use index scan or range scan:
2239     set_up_ref_access_to_key= false;
2240   }
2241 
2242 check_reverse_order:
2243   assert(order_direction != 0);
2244 
2245   if (order_direction == -1)		// If ORDER BY ... DESC
2246   {
2247     if (tab->quick())
2248     {
2249       /*
2250 	Don't reverse the sort order, if it's already done.
2251         (In some cases test_if_order_by_key() can be called multiple times
2252       */
2253       if (tab->quick()->reverse_sorted())
2254       {
2255         can_skip_sorting= true;
2256         goto fix_ICP;
2257       }
2258 
2259       if (tab->quick()->reverse_sort_possible())
2260         can_skip_sorting= true;
2261       else
2262       {
2263         can_skip_sorting= false;
2264         goto fix_ICP;
2265       }
2266     }
2267     else
2268     {
2269       // Other index access (ref or scan) poses no problem
2270       can_skip_sorting= true;
2271     }
2272   }
2273   else
2274   {
2275     // ORDER BY ASC poses no problem
2276     can_skip_sorting= true;
2277   }
2278 
2279   assert(can_skip_sorting);
2280 
2281   /*
2282     Update query plan with access pattern for doing
2283     ordered access according to what we have decided
2284     above.
2285   */
2286   if (!no_changes) // We are allowed to update QEP
2287   {
2288     if (set_up_ref_access_to_key)
2289     {
2290       /*
2291         We'll use ref access method on key changed_key. In general case
2292         the index search tuple for changed_ref_key will be different (e.g.
2293         when one index is defined as (part1, part2, ...) and another as
2294         (part1, part2(N), ...) and the WHERE clause contains
2295         "part1 = const1 AND part2=const2".
2296         So we build tab->ref() from scratch here.
2297       */
2298       Key_use *keyuse= tab->keyuse();
2299       while (keyuse->key != (uint)changed_key &&
2300              keyuse->table_ref == tab->table_ref)
2301         keyuse++;
2302 
2303       if (create_ref_for_key(join, tab, keyuse, tab->prefix_tables()))
2304       {
2305         can_skip_sorting= false;
2306         goto fix_ICP;
2307       }
2308 
2309       assert(tab->type() != JT_REF_OR_NULL && tab->type() != JT_FT);
2310 
2311       // Changing the key makes filter_effect obsolete
2312       tab->position()->filter_effect= COND_FILTER_STALE;
2313     }
2314     else if (best_key >= 0)
2315     {
2316       /*
2317         If ref_key used index tree reading only ('Using index' in EXPLAIN),
2318         and best_key doesn't, then revert the decision.
2319       */
2320       if(!table->covering_keys.is_set(best_key))
2321         table->set_keyread(false);
2322       if (!tab->quick() || tab->quick() == save_quick) // created no QUICK
2323       {
2324         // Avoid memory leak:
2325         assert(tab->quick() == save_quick || tab->quick() == NULL);
2326         tab->set_quick(NULL);
2327         tab->set_index(best_key);
2328         tab->set_type(JT_INDEX_SCAN);       // Read with index_first(), index_next()
2329         /*
2330           There is a bug. When we change here, e.g. from group_min_max to
2331           index scan: loose index scan expected to read a small number of rows
2332           (jumping through the index), this small number was in
2333           position()->rows_fetched; index scan will read much more, so
2334           rows_fetched should be updated. So should the filtering effect.
2335           It is visible in main.distinct in trunk:
2336           explain SELECT distinct a from t3 order by a desc limit 2;
2337           id	select_type	table	partitions	type	possible_keys	key	key_len	ref	rows	filtered	Extra
2338           1	SIMPLE	t3	NULL	index	a	a	5	NULL	40	25.00	Using index
2339           "rows=40" should be ~200 i.e. # of records in table. Filter should be
2340           100.00 (no WHERE).
2341         */
2342         table->file->ha_index_or_rnd_end();
2343         if (thd->lex->is_explain())
2344         {
2345           /*
2346             @todo this neutralizes add_ref_to_table_cond(); as a result
2347             EXPLAIN shows no "using where" though real SELECT has one.
2348           */
2349           tab->ref().key= -1;
2350           tab->ref().key_parts= 0;
2351         }
2352         tab->position()->filter_effect= COND_FILTER_STALE;
2353       }
2354       else if (tab->type() != JT_ALL)
2355       {
2356         /*
2357           We're about to use a quick access to the table.
2358           We need to change the access method so as the quick access
2359           method is actually used.
2360         */
2361         assert(tab->quick());
2362         assert(tab->quick()->index==(uint)best_key);
2363         tab->set_type(calc_join_type(tab->quick()->get_type()));
2364         tab->use_quick=QS_RANGE;
2365         tab->ref().key= -1;
2366         tab->ref().key_parts=0;		// Don't use ref key.
2367         if (tab->quick()->is_loose_index_scan())
2368           join->tmp_table_param.precomputed_group_by= TRUE;
2369         tab->position()->filter_effect= COND_FILTER_STALE;
2370       }
2371     } // best_key >= 0
2372 
2373     if (order_direction == -1)		// If ORDER BY ... DESC
2374     {
2375       if (tab->quick())
2376       {
2377         /* ORDER BY range_key DESC */
2378         QUICK_SELECT_I *tmp= tab->quick()->make_reverse(used_key_parts);
2379         if (!tmp)
2380         {
2381           /* purecov: begin inspected */
2382           can_skip_sorting= false;      // Reverse sort failed -> filesort
2383           goto fix_ICP;
2384           /* purecov: end */
2385         }
2386         if (tab->quick() != tmp && tab->quick() != save_quick)
2387           delete tab->quick();
2388         tab->set_quick(tmp);
2389         tab->set_type(calc_join_type(tmp->get_type()));
2390         tab->position()->filter_effect= COND_FILTER_STALE;
2391       }
2392       else if (tab->type() == JT_REF &&
2393                tab->ref().key_parts <= used_key_parts)
2394       {
2395         /*
2396           SELECT * FROM t1 WHERE a=1 ORDER BY a DESC,b DESC
2397 
2398           Use a traversal function that starts by reading the last row
2399           with key part (A) and then traverse the index backwards.
2400         */
2401         tab->reversed_access= true;
2402 
2403         /*
2404           The current implementation of join_read_prev_same() does not
2405           work well in combination with ICP and can lead to increased
2406           execution time. Setting changed_key to the current key
2407           (based on that we change the access order for the key) will
2408           ensure that a pushed index condition will be cancelled.
2409         */
2410         changed_key= tab->ref().key;
2411       }
2412       else if (tab->type() == JT_INDEX_SCAN)
2413         tab->reversed_access= true;
2414     }
2415     else if (tab->quick())
2416       tab->quick()->need_sorted_output();
2417 
2418   } // QEP has been modified
2419 
2420 fix_ICP:
2421   /*
2422     Cleanup:
2423     We may have both a 'tab->quick()' and 'save_quick' (original)
2424     at this point. Delete the one that we won't use.
2425   */
2426   if (can_skip_sorting && !no_changes)
2427   {
2428     if (tab->type() == JT_INDEX_SCAN &&
2429         select_limit < table->file->stats.records)
2430     {
2431       tab->position()->rows_fetched= select_limit;
2432       tab->position()->filter_effect= COND_FILTER_STALE_NO_CONST;
2433     }
2434 
2435     // Keep current (ordered) tab->quick()
2436     if (save_quick != tab->quick())
2437       delete save_quick;
2438   }
2439   else
2440   {
2441     // Restore original save_quick
2442     if (tab->quick() != save_quick)
2443     {
2444       delete tab->quick();
2445       tab->set_quick(save_quick);
2446     }
2447   }
2448 
2449   trace_steps.end();
2450   Opt_trace_object
2451     trace_change_index(trace, "index_order_summary");
2452   trace_change_index.add_utf8_table(tab->table_ref)
2453     .add("index_provides_order", can_skip_sorting)
2454     .add_alnum("order_direction", order_direction == 1 ? "asc" :
2455                ((order_direction == -1) ? "desc" :
2456                 "undefined"));
2457 
2458   if (changed_key >= 0)
2459   {
2460     // switching to another index
2461     // Should be no pushed conditions at this point
2462     assert(!table->file->pushed_idx_cond);
2463     if (unlikely(trace->is_started()))
2464     {
2465       trace_change_index.add_utf8("index", table->key_info[changed_key].name);
2466       trace_change_index.add("plan_changed", !no_changes);
2467       if (!no_changes)
2468         trace_change_index.add_alnum("access_type", join_type_str[tab->type()]);
2469     }
2470   }
2471   else if (unlikely(trace->is_started()))
2472   {
2473     trace_change_index.add_utf8("index",
2474                                 ref_key >= 0 ?
2475                                 table->key_info[ref_key].name : "unknown");
2476     trace_change_index.add("plan_changed", false);
2477   }
2478   DBUG_RETURN(can_skip_sorting);
2479 }
2480 
2481 
2482 /**
2483   Prune partitions for all tables of a join (query block).
2484 
2485   Requires that tables have been locked.
2486 
2487   @returns false if success, true if error
2488 */
2489 
prune_table_partitions()2490 bool JOIN::prune_table_partitions()
2491 {
2492   assert(select_lex->partitioned_table_count);
2493 
2494   for (TABLE_LIST *tbl= select_lex->leaf_tables; tbl; tbl= tbl->next_leaf)
2495   {
2496     /*
2497       If tbl->embedding!=NULL that means that this table is in the inner
2498       part of the nested outer join, and we can't do partition pruning
2499       (TODO: check if this limitation can be lifted.
2500              This also excludes semi-joins.  Is that intentional?)
2501       This will try to prune non-static conditions, which can
2502       be used after the tables are locked.
2503     */
2504     if (!tbl->embedding)
2505     {
2506       Item *prune_cond= tbl->join_cond_optim() ?
2507                         tbl->join_cond_optim() : where_cond;
2508       if (prune_partitions(thd, tbl->table, prune_cond))
2509         return true;
2510     }
2511   }
2512 
2513   return false;
2514 }
2515 
2516 
2517 /**
2518   A helper function to check whether it's better to use range than ref.
2519 
2520   @details
2521   Heuristic: Switch from 'ref' to 'range' access if 'range'
2522   access can utilize more keyparts than 'ref' access. Conditions
2523   for doing switching:
2524 
2525   1) Range access is possible Or tab->dodgy_ref_cost is set.
2526   2) This function is not relevant for FT, since there is no range access for
2527      that type of index.
2528   3) Used parts of key shouldn't have nullable parts & ref_or_null isn't used.
2529   4) 'ref' access depends on a constant, not a value read from a
2530      table earlier in the join sequence.
2531 
2532      Rationale: if 'ref' depends on a value from another table,
2533      the join condition is not used to limit the rows read by
2534      'range' access (that would require dynamic range - 'Range
2535      checked for each record'). In other words, if 'ref' depends
2536      on a value from another table, we have a query with
2537      conditions of the form
2538 
2539       this_table.idx_col1 = other_table.col AND   <<- used by 'ref'
2540       this_table.idx_col1 OP <const> AND          <<- used by 'range'
2541       this_table.idx_col2 OP <const> AND ...      <<- used by 'range'
2542 
2543      and an index on (idx_col1,idx_col2,...). But the fact that
2544      'range' access uses more keyparts does not mean that it is
2545      more selective than 'ref' access because these access types
2546      utilize different parts of the query condition. We
2547      therefore trust the cost based choice made by
2548      best_access_path() instead of forcing a heuristic choice
2549      here.
2550      5a) 'ref' access and 'range' access uses the same index.
2551      5b) 'range' access uses more keyparts than 'ref' access.
2552 
2553      OR
2554 
2555      6) Ref has borrowed the index estimate from range and created a cost
2556         estimate (See Optimize_table_order::find_best_ref). This will be a
2557         problem if range built it's row estimate using a larger number of key
2558         parts than ref. In such a case, shift to range access over the same
2559         index. So run the range optimizer with that index as the only choice.
2560         (Condition 5 is not relevant here since it has been tested in
2561         find_best_ref.)
2562 
2563   @param thd THD      To re-run range optimizer.
2564   @param tab JOIN_TAB To check the above conditions.
2565 
2566   @return true   Range is better than ref
2567   @return false  Ref is better or switch isn't possible
2568 
2569   @todo: This decision should rather be made in best_access_path()
2570 */
2571 
can_switch_from_ref_to_range(THD * thd,JOIN_TAB * tab)2572 static bool can_switch_from_ref_to_range(THD *thd, JOIN_TAB *tab)
2573 {
2574   if ((tab->quick() || tab->dodgy_ref_cost) &&               // 1)
2575       tab->position()->key->keypart != FT_KEYPART)           // 2)
2576   {
2577     uint keyparts= 0, length= 0;
2578     table_map dep_map= 0;
2579     bool maybe_null= false;
2580 
2581     calc_length_and_keyparts(tab->position()->key, tab,
2582                              tab->position()->key->key,
2583                              tab->prefix_tables(), NULL, &length, &keyparts,
2584                              &dep_map, &maybe_null);
2585     if (maybe_null ||                                        // 3)
2586         dep_map)                                             // 4)
2587       return false;
2588 
2589     if (tab->quick() &&
2590         tab->position()->key->key == tab->quick()->index)    // 5a)
2591       return length < tab->quick()->max_used_key_length;     // 5b)
2592     else if (tab->dodgy_ref_cost)                            // 6)
2593     {
2594       key_map new_ref_key_map;
2595       new_ref_key_map.set_bit(tab->position()->key->key);
2596 
2597       Opt_trace_context * const trace= &thd->opt_trace;
2598       Opt_trace_object trace_wrapper(trace);
2599 
2600       Opt_trace_object
2601         can_switch(trace, "check_if_range_uses_more_keyparts_than_ref");
2602       Opt_trace_object
2603         trace_setup_cond(trace, "rerunning_range_optimizer_for_single_index");
2604 
2605       QUICK_SELECT_I *qck;
2606       if (test_quick_select(thd, new_ref_key_map,
2607                             0,       // empty table_map
2608                             tab->join()->row_limit,
2609                             false,   // don't force quick range
2610                             ORDER::ORDER_NOT_RELEVANT,
2611                             tab,
2612                             tab->join_cond() ? tab->join_cond() :
2613                             tab->join()->where_cond,
2614                             &tab->needed_reg,
2615                             &qck, true) > 0)
2616       {
2617         if (length < qck->max_used_key_length)
2618         {
2619           delete tab->quick();
2620           tab->set_quick(qck);
2621           return true;
2622         }
2623         else
2624         {
2625           Opt_trace_object (trace, "access_type_unchanged").
2626             add("ref_key_length", length).
2627             add("range_key_length", qck->max_used_key_length);
2628           delete qck;
2629         }
2630       }
2631     }
2632   }
2633   return false;
2634 }
2635 
2636 /**
2637  An utility function - apply heuristics and optimize access methods to tables.
2638  Currently this function can change REF to RANGE and ALL to INDEX scan if
2639  latter is considered to be better (not cost-based) than the former.
2640  @note Side effect - this function could set 'Impossible WHERE' zero
2641  result.
2642 */
2643 
adjust_access_methods()2644 void JOIN::adjust_access_methods()
2645 {
2646   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
2647   for (uint i= const_tables; i < tables; i++)
2648   {
2649     JOIN_TAB *const tab= best_ref[i];
2650     TABLE_LIST *const tl= tab->table_ref;
2651 
2652     if (tab->type() == JT_ALL)
2653     {
2654       /*
2655        It's possible to speedup query by switching from full table scan to
2656        the scan of covering index, due to less data being read.
2657        Prerequisites for this are:
2658        1) Keyread (i.e index only scan) is allowed (table isn't updated/deleted
2659          from)
2660        2) Covering indexes are available
2661        3) This isn't a derived table/materialized view
2662       */
2663       if (!tab->table()->no_keyread &&                                //  1
2664           !tab->table()->covering_keys.is_clear_all() &&              //  2
2665           !tl->uses_materialization())                                //  3
2666       {
2667         /*
2668         It has turned out that the change commented out below, while speeding
2669         things up for disk-bound loads, slows them down for cases when the data
2670         is in disk cache (see BUG#35850):
2671         //  See bug #26447: "Using the clustered index for a table scan
2672         //  is always faster than using a secondary index".
2673         if (table->s->primary_key != MAX_KEY &&
2674             table->file->primary_key_is_clustered())
2675           tab->index= table->s->primary_key;
2676         else
2677           tab->index=find_shortest_key(table, & table->covering_keys);
2678         */
2679         if (tab->position()->sj_strategy != SJ_OPT_LOOSE_SCAN)
2680           tab->set_index(find_shortest_key(tab->table(), &tab->table()->covering_keys));
2681         tab->set_type(JT_INDEX_SCAN);      // Read with index_first / index_next
2682         // From table scan to index scan, thus filter effect needs no recalc.
2683       }
2684       else if (!tab->table()->no_keyread && !tl->uses_materialization())
2685       {
2686         assert(tab->table()->covering_keys.is_clear_all());
2687         if (tab->position()->sj_strategy != SJ_OPT_LOOSE_SCAN)
2688         {
2689           key_map clustering_keys;
2690           for (uint i= 0; i < tab->table()->s->keys; i++)
2691           {
2692             if (tab->keys().is_set(i)
2693                 && tab->table()->file->index_flags(i, 0, 0)
2694                 & HA_CLUSTERED_INDEX)
2695               clustering_keys.set_bit(i);
2696           }
2697           uint index= find_shortest_key(tab->table(), &clustering_keys);
2698           if (index != MAX_KEY)
2699           {
2700             tab->set_type(JT_INDEX_SCAN);
2701             tab->set_index(index);
2702           }
2703         }
2704       }
2705     }
2706     else if (tab->type() == JT_REF)
2707     {
2708       if (can_switch_from_ref_to_range(thd, tab))
2709       {
2710         tab->set_type(JT_RANGE);
2711 
2712         Opt_trace_context * const trace= &thd->opt_trace;
2713         Opt_trace_object wrapper(trace);
2714         Opt_trace_object (trace, "access_type_changed").
2715           add_utf8_table(tl).
2716           add_utf8("index",
2717                    tab->table()->key_info[tab->position()->key->key].name).
2718           add_alnum("old_type", "ref").
2719           add_alnum("new_type", join_type_str[tab->type()]).
2720           add_alnum("cause", "uses_more_keyparts");
2721 
2722         tab->use_quick= QS_RANGE;
2723         tab->position()->filter_effect= COND_FILTER_STALE;
2724       }
2725       else
2726       {
2727         // Cleanup quick, REF/REF_OR_NULL/EQ_REF, will be clarified later
2728         delete tab->quick();
2729         tab->set_quick(NULL);
2730       }
2731     }
2732     // Ensure AM consistency
2733     assert(!(tab->quick() && (tab->type() == JT_REF || tab->type() == JT_ALL)));
2734     assert((tab->type() != JT_RANGE && tab->type() != JT_INDEX_MERGE) ||
2735            tab->quick());
2736     if (!tab->const_keys.is_clear_all() &&
2737         tab->table()->reginfo.impossible_range &&
2738         ((i == const_tables && tab->type() == JT_REF) ||
2739          ((tab->type() == JT_ALL || tab->type() == JT_RANGE ||
2740            tab->type() == JT_INDEX_MERGE || tab->type() == JT_INDEX_SCAN) &&
2741            tab->use_quick != QS_RANGE)) &&
2742         !tab->table_ref->is_inner_table_of_outer_join())
2743       zero_result_cause=
2744         "Impossible WHERE noticed after reading const tables";
2745   }
2746 }
2747 
2748 
alloc_jtab_array(THD * thd,uint table_count)2749 static JOIN_TAB *alloc_jtab_array(THD *thd, uint table_count)
2750 {
2751   JOIN_TAB *t= new (thd->mem_root) JOIN_TAB[table_count];
2752   if (!t)
2753     return NULL;                                /* purecov: inspected */
2754 
2755   QEP_shared *qs= new (thd->mem_root) QEP_shared[table_count];
2756   if (!qs)
2757     return NULL;                                /* purecov: inspected */
2758 
2759   for (uint i= 0; i < table_count; ++i)
2760     t[i].set_qs(qs++);
2761 
2762   return t;
2763 }
2764 
2765 
2766 /**
2767   Set up JOIN_TAB structs according to the picked join order in best_positions.
2768   This allocates execution structures so may be called only after we have the
2769   very final plan. It must be called after
2770   Optimize_table_order::fix_semijoin_strategies().
2771 
2772   @return False if success, True if error
2773 
2774   @details
2775     - create join->join_tab array and copy from existing JOIN_TABs in join order
2776     - create helper structs for materialized semi-join handling
2777     - finalize semi-join strategy choices
2778     - Number of intermediate tables "tmp_tables" is calculated.
2779     - "tables" and "primary_tables" are recalculated.
2780     - for full and index scans info of estimated # of records is updated.
2781     - in a helper function:
2782       - all heuristics are applied and the final access method type is picked
2783         for each join_tab (only test_if_skip_sortorder() could override it)
2784       - AM consistency is ensured (e.g only range and index merge are allowed
2785         to have quick select set).
2786       - if "Impossible WHERE" is detected - appropriate zero_result_cause is
2787         set.
2788 
2789    Notice that intermediate tables will not have a POSITION reference; and they
2790    will not have a TABLE reference before the final stages of code generation.
2791 
2792    @todo the block which sets tab->type should move to adjust_access_methods
2793    for unification.
2794 */
2795 
get_best_combination()2796 bool JOIN::get_best_combination()
2797 {
2798   DBUG_ENTER("JOIN::get_best_combination");
2799 
2800   // At this point "tables" and "primary"tables" represent the same:
2801   assert(tables == primary_tables);
2802 
2803   /*
2804     Allocate additional space for tmp tables.
2805     Number of plan nodes:
2806       # of regular input tables (including semi-joined ones) +
2807       # of semi-join nests for materialization +
2808       1? + // For GROUP BY
2809       1? + // For DISTINCT
2810       1? + // For aggregation functions aggregated in outer query
2811            // when used with distinct
2812       1? + // For ORDER BY
2813       1?   // buffer result
2814     Up to 2 tmp tables are actually used, but it's hard to tell exact number
2815     at this stage.
2816   */
2817   uint num_tmp_tables= (group_list ? 1 : 0) +
2818                        (select_distinct ?
2819                         (tmp_table_param.outer_sum_func_count ? 2 : 1) : 0) +
2820                        (order ? 1 : 0) +
2821                        (select_lex->active_options() &
2822                         (SELECT_BIG_RESULT | OPTION_BUFFER_RESULT) ? 1 : 0);
2823   if (num_tmp_tables > 2)
2824     num_tmp_tables= 2;
2825 
2826   /*
2827     Rearrange queries with materialized semi-join nests so that the semi-join
2828     nest is replaced with a reference to a materialized temporary table and all
2829     materialized subquery tables are placed after the intermediate tables.
2830     After the following loop, "inner_target" is the position of the first
2831     subquery table (if any). "outer_target" is the position of first outer
2832     table, and will later be used to track the position of any materialized
2833     temporary tables.
2834   */
2835   const bool has_semijoin= !select_lex->sj_nests.is_empty();
2836   uint outer_target= 0;
2837   uint inner_target= primary_tables + num_tmp_tables;
2838   uint sjm_nests= 0;
2839 
2840   if (has_semijoin)
2841   {
2842     for (uint tableno= 0; tableno < primary_tables; )
2843     {
2844       if (sj_is_materialize_strategy(best_positions[tableno].sj_strategy))
2845       {
2846         sjm_nests++;
2847         inner_target-= (best_positions[tableno].n_sj_tables - 1);
2848         tableno+= best_positions[tableno].n_sj_tables;
2849       }
2850       else
2851         tableno++;
2852     }
2853   }
2854 
2855   JOIN_TAB *tmp_join_tabs= NULL;
2856   if (sjm_nests + num_tmp_tables)
2857   {
2858     // join_tab array only has "primary_tables" tables. We need those more:
2859     if (!(tmp_join_tabs= alloc_jtab_array(thd, sjm_nests + num_tmp_tables)))
2860       DBUG_RETURN(true);                        /* purecov: inspected */
2861   }
2862 
2863   // To check that we fill the array correctly: fill it with zeros first
2864   memset(best_ref, 0, sizeof(JOIN_TAB*) * (primary_tables + sjm_nests +
2865                                            num_tmp_tables));
2866 
2867   int sjm_index= tables;  // Number assigned to materialized temporary table
2868   int remaining_sjm_inner= 0;
2869   bool err= false;
2870   for (uint tableno= 0; tableno < tables; tableno++)
2871   {
2872     POSITION *const pos= best_positions + tableno;
2873     if (has_semijoin && sj_is_materialize_strategy(pos->sj_strategy))
2874     {
2875       assert(outer_target < inner_target);
2876 
2877       TABLE_LIST *const sj_nest= pos->table->emb_sj_nest;
2878 
2879       // Handle this many inner tables of materialized semi-join
2880       remaining_sjm_inner= pos->n_sj_tables;
2881 
2882       /*
2883         If we fail in some allocation below, we cannot bail out immediately;
2884         that would put us in a difficult situation to clean up; imagine we
2885         have planned this layout:
2886           outer1 - sj_mat_tmp1 - outer2 - sj_mat_tmp2 - outer3
2887         We have successfully filled a JOIN_TAB for sj_mat_tmp1, and are
2888         failing to fill a JOIN_TAB for sj_mat_tmp2 (OOM). So we want to quit
2889         this function, which will lead to cleanup functions.
2890         But sj_mat_tmp1 is in this->best_ref only, outer3 is in this->join_tab
2891         only: what is the array to traverse for cleaning up? What is the
2892         number of tables to loop over?
2893         So: if we fail in the present loop, we record the error but continue
2894         filling best_ref; when it's fully filled, bail out, because then
2895         best_ref can be used as reliable array for cleaning up.
2896       */
2897       JOIN_TAB *const tab= tmp_join_tabs++;
2898       best_ref[outer_target]= tab;
2899       tab->set_join(this);
2900       tab->set_idx(outer_target);
2901 
2902       /*
2903         Up to this point there cannot be a failure. JOIN_TAB has been filled
2904         enough to be clean-able.
2905       */
2906 
2907       Semijoin_mat_exec *const sjm_exec=
2908         new (thd->mem_root)
2909         Semijoin_mat_exec(sj_nest,
2910                           (pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN),
2911                           remaining_sjm_inner, outer_target, inner_target);
2912 
2913       tab->set_sj_mat_exec(sjm_exec);
2914 
2915       if (!sjm_exec ||
2916           setup_semijoin_materialized_table(tab, sjm_index,
2917                                             pos, best_positions + sjm_index))
2918         err= true;                              /* purecov: inspected */
2919 
2920       outer_target++;
2921       sjm_index++;
2922     }
2923     /*
2924       Locate join_tab target for the table we are considering.
2925       (remaining_sjm_inner becomes negative for non-SJM tables, this can be
2926        safely ignored).
2927     */
2928     const uint target=
2929       (remaining_sjm_inner--) > 0 ? inner_target++ : outer_target++;
2930     JOIN_TAB *const tab= pos->table;
2931 
2932     best_ref[target]= tab;
2933     tab->set_idx(target);
2934     tab->set_position(pos);
2935     TABLE *const table= tab->table();
2936     if (tab->type() != JT_CONST && tab->type() != JT_SYSTEM)
2937     {
2938       if (pos->sj_strategy == SJ_OPT_LOOSE_SCAN && tab->quick() &&
2939           tab->quick()->index != pos->loosescan_key)
2940       {
2941         /*
2942           We must use the duplicate-eliminating index, so this QUICK is not
2943           an option.
2944         */
2945         delete tab->quick();
2946         tab->set_quick(NULL);
2947       }
2948       if (!pos->key)
2949       {
2950         if (tab->quick())
2951           tab->set_type(calc_join_type(tab->quick()->get_type()));
2952         else
2953           tab->set_type(JT_ALL);
2954       }
2955       else
2956         // REF or RANGE, clarify later when prefix tables are set for JOIN_TABs
2957         tab->set_type(JT_REF);
2958     }
2959     assert(tab->type() != JT_UNKNOWN);
2960 
2961     assert(table->reginfo.join_tab == tab);
2962     if (!tab->join_cond())
2963       table->reginfo.not_exists_optimize= false;     // Only with LEFT JOIN
2964     map2table[tab->table_ref->tableno()]= tab;
2965   }
2966 
2967   // Count the materialized semi-join tables as regular input tables
2968   tables+= sjm_nests + num_tmp_tables;
2969   // Set the number of non-materialized tables:
2970   primary_tables= outer_target;
2971 
2972   /*
2973     Between the last outer table or sj-mat tmp table, and the first sj-mat
2974     inner table, there may be 2 slots for sort/group/etc tmp tables:
2975   */
2976   for (uint i= 0; i < num_tmp_tables; ++i)
2977   {
2978     const uint idx= outer_target + i;
2979     tmp_join_tabs->set_join(this);
2980     tmp_join_tabs->set_idx(idx);
2981     assert(best_ref[idx] == NULL); // verify that not overwriting
2982     best_ref[idx]= tmp_join_tabs++;
2983     /*
2984       note that set_table() cannot be called yet. We may not even use this
2985       JOIN_TAB in the end, it's dummy at the moment. Which can be tested with
2986       "position()!=NULL".
2987     */
2988   }
2989 
2990   // make array unreachable: should walk JOIN_TABs by best_ref now
2991   join_tab= NULL;
2992 
2993   if (err)
2994     DBUG_RETURN(true);                          /* purecov: inspected */
2995 
2996   if (has_semijoin)
2997   {
2998     set_semijoin_info();
2999 
3000     // Update equalities and keyuses after having added SJ materialization
3001     if (update_equalities_for_sjm())
3002       DBUG_RETURN(true);
3003   }
3004   if (!plan_is_const())
3005   {
3006     // Assign map of "available" tables to all tables belonging to query block
3007     set_prefix_tables();
3008     adjust_access_methods();
3009   }
3010   // Calculate outer join info
3011   if (select_lex->outer_join)
3012     make_outerjoin_info();
3013 
3014   // sjm is no longer needed, trash it. To reuse it, reset its members!
3015   List_iterator<TABLE_LIST> sj_list_it(select_lex->sj_nests);
3016   TABLE_LIST *sj_nest;
3017   while ((sj_nest= sj_list_it++))
3018     TRASH(static_cast<void*>(&sj_nest->nested_join->sjm),
3019           sizeof(sj_nest->nested_join->sjm));
3020 
3021   DBUG_RETURN(false);
3022 }
3023 
3024 
3025 /*
3026   Revise usage of join buffer for the specified table and the whole nest
3027 
3028   SYNOPSIS
3029     revise_cache_usage()
3030       tab    join table for which join buffer usage is to be revised
3031 
3032   DESCRIPTION
3033     The function revise the decision to use a join buffer for the table 'tab'.
3034     If this table happened to be among the inner tables of a nested outer join/
3035     semi-join the functions denies usage of join buffers for all of them
3036 
3037   RETURN
3038     none
3039 */
3040 
3041 static
revise_cache_usage(JOIN_TAB * join_tab)3042 void revise_cache_usage(JOIN_TAB *join_tab)
3043 {
3044   plan_idx first_inner= join_tab->first_inner();
3045   JOIN *const join= join_tab->join();
3046   if (first_inner != NO_PLAN_IDX)
3047   {
3048     plan_idx end_tab= join_tab->idx();
3049     for (first_inner= join_tab->first_inner();
3050          first_inner != NO_PLAN_IDX;
3051          first_inner= join->best_ref[first_inner]->first_upper())
3052     {
3053       for (plan_idx i= end_tab-1; i >= first_inner; --i)
3054         join->best_ref[i]->set_use_join_cache(JOIN_CACHE::ALG_NONE);
3055       end_tab= first_inner;
3056     }
3057   }
3058   else if (join_tab->get_sj_strategy() == SJ_OPT_FIRST_MATCH)
3059   {
3060     plan_idx first_sj_inner= join_tab->first_sj_inner();
3061     for (plan_idx i= join_tab->idx()-1; i >= first_sj_inner; --i)
3062     {
3063       JOIN_TAB *tab= join->best_ref[i];
3064       if (tab->first_sj_inner() == first_sj_inner)
3065         tab->set_use_join_cache(JOIN_CACHE::ALG_NONE);
3066     }
3067   }
3068   else
3069     join_tab->set_use_join_cache(JOIN_CACHE::ALG_NONE);
3070   assert(join->qep_tab == NULL);
3071 }
3072 
3073 
3074 /**
3075   Set up join buffering for a specified table, if possible.
3076 
3077   @param tab             joined table to check join buffer usage for
3078   @param join            join for which the check is performed
3079   @param no_jbuf_after   don't use join buffering after table with this number
3080 
3081   @return false if successful, true if error.
3082           Currently, allocation errors for join cache objects are ignored,
3083           and regular execution is chosen silently.
3084 
3085   @details
3086     The function finds out whether the table 'tab' can be joined using a join
3087     buffer. This check is performed after the best execution plan for 'join'
3088     has been chosen. If the function decides that a join buffer can be employed
3089     then it selects the most appropriate join cache type, which later will
3090     be instantiated by init_join_cache().
3091     If it has already been decided to not use join buffering for this table,
3092     no action is taken.
3093 
3094     Often it is already decided that join buffering will be used earlier in
3095     the optimization process, and this will also ensure that the most correct
3096     cost for the operation is calculated, and hence the probability of
3097     choosing an optimal join plan is higher. However, some join buffering
3098     decisions cannot currently be taken before this stage, hence we need this
3099     function to decide the most accurate join buffering strategy.
3100 
3101     @todo Long-term it is the goal that join buffering strategy is decided
3102     when the plan is selected.
3103 
3104     The result of the check and the type of the join buffer to be used
3105     depend on:
3106       - the access method to access rows of the joined table
3107       - whether the join table is an inner table of an outer join or semi-join
3108       - the optimizer_switch settings for join buffering
3109       - the join 'options'.
3110     In any case join buffer is not used if the number of the joined table is
3111     greater than 'no_jbuf_after'.
3112 
3113     If block_nested_loop is turned on, and if all other criteria for using
3114     join buffering is fulfilled (see below), then join buffer is used
3115     for any join operation (inner join, outer join, semi-join) with 'JT_ALL'
3116     access method.  In that case, a JOIN_CACHE_BNL type is always employed.
3117 
3118     If an index is used to access rows of the joined table and batched_key_access
3119     is on, then a JOIN_CACHE_BKA type is employed. (Unless debug flag,
3120     test_bka unique, is set, then a JOIN_CACHE_BKA_UNIQUE type is employed
3121     instead.)
3122 
3123     If the function decides that a join buffer can be used to join the table
3124     'tab' then it sets @c tab->use_join_cache to reflect the chosen algorithm.
3125 
3126   @note
3127     For a nested outer join/semi-join, currently, we either use join buffers for
3128     all inner tables or for none of them.
3129 
3130   @todo
3131     Support BKA inside SJ-Materialization nests. When doing this, we'll need
3132     to only store sj-inner tables in the join buffer.
3133 #if 0
3134         JOIN_TAB *first_tab= join->join_tab+join->const_tables;
3135         uint n_tables= i-join->const_tables;
3136         / *
3137           We normally put all preceding tables into the join buffer, except
3138           for the constant tables.
3139           If we're inside a semi-join materialization nest, e.g.
3140 
3141              outer_tbl1  outer_tbl2  ( inner_tbl1, inner_tbl2 ) ...
3142                                                        ^-- we're here
3143 
3144           then we need to put into the join buffer only the tables from
3145           within the nest.
3146         * /
3147         if (i >= first_sjm_table && i < last_sjm_table)
3148         {
3149           n_tables= i - first_sjm_table; // will be >0 if we got here
3150           first_tab= join->join_tab + first_sjm_table;
3151         }
3152 #endif
3153 
3154 */
3155 
setup_join_buffering(JOIN_TAB * tab,JOIN * join,uint no_jbuf_after)3156 static bool setup_join_buffering(JOIN_TAB *tab, JOIN *join, uint no_jbuf_after)
3157 {
3158   ASSERT_BEST_REF_IN_JOIN_ORDER(join);
3159   Cost_estimate cost;
3160   ha_rows rows;
3161   uint bufsz= 4096;
3162   uint join_cache_flags = 0;
3163   const bool bnl_on= hint_table_state(join->thd, tab->table_ref->table,
3164                                       BNL_HINT_ENUM, OPTIMIZER_SWITCH_BNL);
3165   const bool bka_on= hint_table_state(join->thd, tab->table_ref->table,
3166                                       BKA_HINT_ENUM, OPTIMIZER_SWITCH_BKA);
3167 
3168   const uint tableno= tab->idx();
3169   const uint tab_sj_strategy= tab->get_sj_strategy();
3170   bool use_bka_unique= false;
3171   DBUG_EXECUTE_IF("test_bka_unique", use_bka_unique= true;);
3172 
3173   /*
3174     If all key_parts are null_rejecting, the MultiRangeRowIterator will
3175     eliminate all NULL values in the key set, such that
3176     HA_MRR_NO_NULL_ENDPOINTS can be promised.
3177   */
3178   const key_part_map keypart_map = make_prev_keypart_map(tab->ref().key_parts);
3179   if (tab->ref().null_rejecting == keypart_map) {
3180     join_cache_flags |= HA_MRR_NO_NULL_ENDPOINTS;
3181   }
3182 
3183   // Set preliminary join cache setting based on decision from greedy search
3184   tab->set_use_join_cache(tab->position()->use_join_buffer ?
3185                           JOIN_CACHE::ALG_BNL : JOIN_CACHE::ALG_NONE);
3186 
3187   if (tableno == join->const_tables)
3188   {
3189     assert(tab->use_join_cache() == JOIN_CACHE::ALG_NONE);
3190     return false;
3191   }
3192 
3193   if (!(bnl_on || bka_on))
3194     goto no_join_cache;
3195 
3196   /*
3197     psergey-todo: why the below when execution code seems to handle the
3198     "range checked for each record" case?
3199   */
3200   if (tab->use_quick == QS_DYNAMIC_RANGE)
3201     goto no_join_cache;
3202 
3203   /* No join buffering if prevented by no_jbuf_after */
3204   if (tableno > no_jbuf_after)
3205     goto no_join_cache;
3206 
3207   /*
3208     An inner table of an outer join nest must not use join buffering if
3209     the first inner table of that outer join nest does not use join buffering.
3210     This condition is not handled by earlier optimizer stages.
3211   */
3212   if (tab->first_inner() != NO_PLAN_IDX &&
3213       tab->first_inner() != tab->idx() &&
3214       !join->best_ref[tab->first_inner()]->use_join_cache())
3215     goto no_join_cache;
3216   /*
3217     The first inner table of an outer join nest must not use join buffering
3218     if the tables in the embedding outer join nest do not use join buffering.
3219     This condition is not handled by earlier optimizer stages.
3220   */
3221   if (tab->first_upper() != NO_PLAN_IDX &&
3222       !join->best_ref[tab->first_upper()]->use_join_cache())
3223     goto no_join_cache;
3224 
3225   switch (tab_sj_strategy)
3226   {
3227   case SJ_OPT_FIRST_MATCH:
3228     /*
3229       Use join cache with FirstMatch semi-join strategy only when semi-join
3230       contains only one table.
3231     */
3232     if (!tab->is_single_inner_of_semi_join())
3233     {
3234       assert(tab->use_join_cache() == JOIN_CACHE::ALG_NONE);
3235       goto no_join_cache;
3236     }
3237     break;
3238 
3239   case SJ_OPT_LOOSE_SCAN:
3240     /* No join buffering if this semijoin nest is handled by loosescan */
3241     assert(tab->use_join_cache() == JOIN_CACHE::ALG_NONE);
3242     goto no_join_cache;
3243 
3244   case SJ_OPT_MATERIALIZE_LOOKUP:
3245   case SJ_OPT_MATERIALIZE_SCAN:
3246     /*
3247       The Materialize strategies reuse the join_tab belonging to the
3248       first table that was materialized. Neither table can use join buffering:
3249       - The first table in a join never uses join buffering.
3250       - The join_tab used for looking up a row in the materialized table, or
3251         scanning the rows of a materialized table, cannot use join buffering.
3252       We allow join buffering for the remaining tables of the materialized
3253       semi-join nest.
3254     */
3255     if (tab->first_sj_inner() == tab->idx())
3256     {
3257       assert(tab->use_join_cache() == JOIN_CACHE::ALG_NONE);
3258       goto no_join_cache;
3259     }
3260     break;
3261 
3262   case SJ_OPT_DUPS_WEEDOUT:
3263     // This strategy allows the same join buffering as a regular join would.
3264   case SJ_OPT_NONE:
3265     break;
3266   }
3267 
3268   /*
3269     The following code prevents use of join buffering when there is an
3270     outer join operation and first match semi-join strategy is used, because:
3271 
3272     Outer join needs a "match flag" to track that a row should be
3273     NULL-complemented, such flag being attached to first inner table's cache
3274     (tracks whether the cached row from outer table got a match, in which case
3275     no NULL-complemented row is needed).
3276 
3277     FirstMatch also needs a "match flag", such flag is attached to sj inner
3278     table's cache (tracks whether the cached row from outer table already got
3279     a first match in the sj-inner table, in which case we don't need to join
3280     this cached row again)
3281      - but a row in a cache has only one "match flag"
3282      - so if "sj inner table"=="first inner", there is a problem.
3283   */
3284   if (tab_sj_strategy == SJ_OPT_FIRST_MATCH &&
3285       tab->is_inner_table_of_outer_join())
3286     goto no_join_cache;
3287 
3288   switch (tab->type()) {
3289   case JT_ALL:
3290   case JT_INDEX_SCAN:
3291   case JT_RANGE:
3292   case JT_INDEX_MERGE:
3293     if (!bnl_on)
3294     {
3295       assert(tab->use_join_cache() == JOIN_CACHE::ALG_NONE);
3296       goto no_join_cache;
3297     }
3298 
3299     tab->set_use_join_cache(JOIN_CACHE::ALG_BNL);
3300     return false;
3301   case JT_SYSTEM:
3302   case JT_CONST:
3303   case JT_REF:
3304   case JT_EQ_REF:
3305     if (!bka_on)
3306     {
3307       assert(tab->use_join_cache() == JOIN_CACHE::ALG_NONE);
3308       goto no_join_cache;
3309     }
3310 
3311     /*
3312       Disable BKA for materializable derived tables/views as they aren't
3313       instantiated yet.
3314     */
3315     if (tab->table_ref->uses_materialization())
3316       goto no_join_cache;
3317 
3318     /*
3319       Can't use BKA for subquery if dealing with a subquery that can
3320       turn a ref access into a "full scan on NULL key" table scan.
3321 
3322       @see Item_in_optimizer::val_int()
3323       @see subselect_single_select_engine::exec()
3324       @see TABLE_REF::cond_guards
3325       @see push_index_cond()
3326 
3327       @todo: This choice to not use BKA should be done before making
3328       cost estimates, e.g. in set_join_buffer_properties(). That
3329       happens before cond guards are set up, so instead of doing the
3330       check below, BKA should be disabled if
3331        - We are in an IN subquery, and
3332        - The IN predicate is not a top_level_item, and
3333        - The left_expr of the IN predicate may contain NULL values
3334          (left_expr->maybe_null)
3335     */
3336     if (tab->has_guarded_conds())
3337       goto no_join_cache;
3338 
3339     if (tab->table()->covering_keys.is_set(tab->ref().key))
3340       join_cache_flags|= HA_MRR_INDEX_ONLY;
3341     rows= tab->table()->file->multi_range_read_info(tab->ref().key, 10, 20,
3342                                                   &bufsz,
3343                                                   &join_cache_flags, &cost);
3344     /*
3345       Cannot use BKA/BKA_UNIQUE if
3346       1. MRR scan cannot be performed, or
3347       2. MRR default implementation is used
3348       Cannot use BKA if
3349       3. HA_MRR_NO_ASSOCIATION flag is set
3350     */
3351     if ((rows == HA_POS_ERROR) ||                               // 1
3352         (join_cache_flags & HA_MRR_USE_DEFAULT_IMPL) ||    // 2
3353         ((join_cache_flags & HA_MRR_NO_ASSOCIATION) &&     // 3
3354          !use_bka_unique))
3355       goto no_join_cache;
3356 
3357     if (use_bka_unique)
3358       tab->set_use_join_cache(JOIN_CACHE::ALG_BKA_UNIQUE);
3359     else
3360       tab->set_use_join_cache(JOIN_CACHE::ALG_BKA);
3361 
3362     tab->join_cache_flags= join_cache_flags;
3363     return false;
3364   default : ;
3365   }
3366 
3367 no_join_cache:
3368   revise_cache_usage(tab);
3369   tab->set_use_join_cache(JOIN_CACHE::ALG_NONE);
3370   return false;
3371 }
3372 
3373 
3374 /*****************************************************************************
3375   Make some simple condition optimization:
3376   If there is a test 'field = const' change all refs to 'field' to 'const'
3377   Remove all dummy tests 'item = item', 'const op const'.
3378   Remove all 'item is NULL', when item can never be null!
3379   item->marker should be 0 for all items on entry
3380   Return in cond_value FALSE if condition is impossible (1 = 2)
3381 *****************************************************************************/
3382 
3383 class COND_CMP :public ilink<COND_CMP> {
3384 public:
operator new(size_t size)3385   static void *operator new(size_t size)
3386   {
3387     return sql_alloc(size);
3388   }
operator delete(void * ptr MY_ATTRIBUTE ((unused)),size_t size MY_ATTRIBUTE ((unused)))3389   static void operator delete(void *ptr MY_ATTRIBUTE((unused)),
3390                               size_t size MY_ATTRIBUTE((unused)))
3391   { TRASH(ptr, size); }
3392 
3393   Item *and_level;
3394   Item_func *cmp_func;
COND_CMP(Item * a,Item_func * b)3395   COND_CMP(Item *a,Item_func *b) :and_level(a),cmp_func(b) {}
3396 };
3397 
3398 
3399 /**
3400   Find the multiple equality predicate containing a field.
3401 
3402   The function retrieves the multiple equalities accessed through
3403   the cond_equal structure from current level and up looking for
3404   an equality containing a field. It stops retrieval as soon as the equality
3405   is found and set up inherited_fl to TRUE if it's found on upper levels.
3406 
3407   @param cond_equal          multiple equalities to search in
3408   @param item_field          field to look for
3409   @param[out] inherited_fl   set up to TRUE if multiple equality is found
3410                              on upper levels (not on current level of
3411                              cond_equal)
3412 
3413   @return
3414     - Item_equal for the found multiple equality predicate if a success;
3415     - NULL otherwise.
3416 */
3417 
find_item_equal(COND_EQUAL * cond_equal,Item_field * item_field,bool * inherited_fl)3418 Item_equal *find_item_equal(COND_EQUAL *cond_equal, Item_field *item_field,
3419                             bool *inherited_fl)
3420 {
3421   Item_equal *item= 0;
3422   bool in_upper_level= FALSE;
3423   while (cond_equal)
3424   {
3425     List_iterator_fast<Item_equal> li(cond_equal->current_level);
3426     while ((item= li++))
3427     {
3428       if (item->contains(item_field->field))
3429         goto finish;
3430     }
3431     in_upper_level= TRUE;
3432     cond_equal= cond_equal->upper_levels;
3433   }
3434   in_upper_level= FALSE;
3435 finish:
3436   *inherited_fl= in_upper_level;
3437   return item;
3438 }
3439 
3440 
3441 /**
3442   Get the best field substitution for a given field.
3443 
3444   If the field is member of a multiple equality, look up that equality
3445   and return the most appropriate field. Usually this is the equivalenced
3446   field belonging to the outer-most table in the join order, but
3447   @see Item_field::get_subst_item() for details.
3448   Otherwise, return the same field.
3449 
3450   @param item_field The field that we are seeking a substitution for.
3451   @param cond_equal multiple equalities to search in
3452 
3453   @return The substituted field.
3454 */
3455 
get_best_field(Item_field * item_field,COND_EQUAL * cond_equal)3456 Item_field *get_best_field(Item_field *item_field, COND_EQUAL *cond_equal)
3457 {
3458   bool dummy;
3459   Item_equal *item_eq= find_item_equal(cond_equal, item_field, &dummy);
3460   if (!item_eq)
3461     return item_field;
3462 
3463   return item_eq->get_subst_item(item_field);
3464 }
3465 
3466 
3467 /**
3468   Check whether an equality can be used to build multiple equalities.
3469 
3470     This function first checks whether the equality (left_item=right_item)
3471     is a simple equality i.e. one that equates a field with another field
3472     or a constant (field=field_item or field=const_item).
3473     If this is the case the function looks for a multiple equality
3474     in the lists referenced directly or indirectly by cond_equal inferring
3475     the given simple equality. If it doesn't find any, it builds a multiple
3476     equality that covers the predicate, i.e. the predicate can be inferred
3477     from this multiple equality.
3478     The built multiple equality could be obtained in such a way:
3479     create a binary  multiple equality equivalent to the predicate, then
3480     merge it, if possible, with one of old multiple equalities.
3481     This guarantees that the set of multiple equalities covering equality
3482     predicates will be minimal.
3483 
3484   EXAMPLE:
3485     For the where condition
3486     @code
3487       WHERE a=b AND b=c AND
3488             (b=2 OR f=e)
3489     @endcode
3490     the check_equality will be called for the following equality
3491     predicates a=b, b=c, b=2 and f=e.
3492     - For a=b it will be called with *cond_equal=(0,[]) and will transform
3493       *cond_equal into (0,[Item_equal(a,b)]).
3494     - For b=c it will be called with *cond_equal=(0,[Item_equal(a,b)])
3495       and will transform *cond_equal into CE=(0,[Item_equal(a,b,c)]).
3496     - For b=2 it will be called with *cond_equal=(ptr(CE),[])
3497       and will transform *cond_equal into (ptr(CE),[Item_equal(2,a,b,c)]).
3498     - For f=e it will be called with *cond_equal=(ptr(CE), [])
3499       and will transform *cond_equal into (ptr(CE),[Item_equal(f,e)]).
3500 
3501   @note
3502     Now only fields that have the same type definitions (verified by
3503     the Field::eq_def method) are placed to the same multiple equalities.
3504     Because of this some equality predicates are not eliminated and
3505     can be used in the constant propagation procedure.
3506     We could weaken the equality test as soon as at least one of the
3507     equal fields is to be equal to a constant. It would require a
3508     more complicated implementation: we would have to store, in
3509     general case, its own constant for each fields from the multiple
3510     equality. But at the same time it would allow us to get rid
3511     of constant propagation completely: it would be done by the call
3512     to build_equal_items_for_cond.
3513 
3514     The implementation does not follow exactly the above rules to
3515     build a new multiple equality for the equality predicate.
3516     If it processes the equality of the form field1=field2, it
3517     looks for multiple equalities me1 containing field1 and me2 containing
3518     field2. If only one of them is found the function expands it with
3519     the lacking field. If multiple equalities for both fields are
3520     found they are merged. If both searches fail a new multiple equality
3521     containing just field1 and field2 is added to the existing
3522     multiple equalities.
3523     If the function processes the predicate of the form field1=const,
3524     it looks for a multiple equality containing field1. If found, the
3525     function checks the constant of the multiple equality. If the value
3526     is unknown, it is setup to const. Otherwise the value is compared with
3527     const and the evaluation of the equality predicate is performed.
3528     When expanding/merging equality predicates from the upper levels
3529     the function first copies them for the current level. It looks
3530     acceptable, as this happens rarely. The implementation without
3531     copying would be much more complicated.
3532 
3533   @param thd         Thread handler
3534   @param left_item   left term of the equality to be checked
3535   @param right_item  right term of the equality to be checked
3536   @param item        equality item if the equality originates from a condition
3537                      predicate, 0 if the equality is the result of row
3538                      elimination
3539   @param cond_equal  multiple equalities that must hold together with the
3540                      equality
3541   @param[out] simple_equality
3542                      true  if the predicate is a simple equality predicate
3543                            to be used for building multiple equalities
3544                      false otherwise
3545 
3546   @returns false if success, true if error
3547 */
3548 
check_simple_equality(THD * thd,Item * left_item,Item * right_item,Item * item,COND_EQUAL * cond_equal,bool * simple_equality)3549 static bool check_simple_equality(THD *thd,
3550                                   Item *left_item, Item *right_item,
3551                                   Item *item, COND_EQUAL *cond_equal,
3552                                   bool *simple_equality)
3553 {
3554   *simple_equality= false;
3555 
3556   if (left_item->type() == Item::REF_ITEM &&
3557       down_cast<Item_ref *>(left_item)->ref_type() == Item_ref::VIEW_REF)
3558   {
3559     if (down_cast<Item_ref *>(left_item)->depended_from)
3560       return false;
3561     left_item= left_item->real_item();
3562   }
3563   if (right_item->type() == Item::REF_ITEM &&
3564       down_cast<Item_ref *>(right_item)->ref_type() == Item_ref::VIEW_REF)
3565   {
3566     if (down_cast<Item_ref *>(right_item)->depended_from)
3567       return false;
3568     right_item= right_item->real_item();
3569   }
3570   Item_field *left_item_field, *right_item_field;
3571 
3572   if (left_item->type() == Item::FIELD_ITEM &&
3573       right_item->type() == Item::FIELD_ITEM &&
3574       (left_item_field= down_cast<Item_field *>(left_item)) &&
3575       (right_item_field= down_cast<Item_field *>(right_item)) &&
3576       !left_item_field->depended_from &&
3577       !right_item_field->depended_from)
3578   {
3579     /* The predicate the form field1=field2 is processed */
3580 
3581     Field *const left_field= left_item_field->field;
3582     Field *const right_field= right_item_field->field;
3583 
3584     if (!left_field->eq_def(right_field))
3585       return false;
3586 
3587     /* Search for multiple equalities containing field1 and/or field2 */
3588     bool left_copyfl, right_copyfl;
3589     Item_equal *left_item_equal=
3590                find_item_equal(cond_equal, left_item_field, &left_copyfl);
3591     Item_equal *right_item_equal=
3592                find_item_equal(cond_equal, right_item_field, &right_copyfl);
3593 
3594     /* As (NULL=NULL) != TRUE we can't just remove the predicate f=f */
3595     if (left_field->eq(right_field)) /* f = f */
3596     {
3597       *simple_equality= !(left_field->maybe_null() && !left_item_equal);
3598       return false;
3599     }
3600 
3601     if (left_item_equal && left_item_equal == right_item_equal)
3602     {
3603       /*
3604         The equality predicate is inference of one of the existing
3605         multiple equalities, i.e the condition is already covered
3606         by upper level equalities
3607       */
3608        *simple_equality= true;
3609        return false;
3610     }
3611 
3612     /* Copy the found multiple equalities at the current level if needed */
3613     if (left_copyfl)
3614     {
3615       /* left_item_equal of an upper level contains left_item */
3616       left_item_equal= new Item_equal(left_item_equal);
3617       if (left_item_equal == NULL)
3618         return true;
3619       cond_equal->current_level.push_back(left_item_equal);
3620     }
3621     if (right_copyfl)
3622     {
3623       /* right_item_equal of an upper level contains right_item */
3624       right_item_equal= new Item_equal(right_item_equal);
3625       if (right_item_equal == NULL)
3626         return true;
3627       cond_equal->current_level.push_back(right_item_equal);
3628     }
3629 
3630     if (left_item_equal)
3631     {
3632       /* left item was found in the current or one of the upper levels */
3633       if (! right_item_equal)
3634         left_item_equal->add(down_cast<Item_field *>(right_item));
3635       else
3636       {
3637         /* Merge two multiple equalities forming a new one */
3638         if (left_item_equal->merge(thd, right_item_equal))
3639           return true;
3640         /* Remove the merged multiple equality from the list */
3641         List_iterator<Item_equal> li(cond_equal->current_level);
3642         while ((li++) != right_item_equal) ;
3643         li.remove();
3644       }
3645     }
3646     else
3647     {
3648       /* left item was not found neither the current nor in upper levels  */
3649       if (right_item_equal)
3650       {
3651         right_item_equal->add(down_cast<Item_field *>(left_item));
3652       }
3653       else
3654       {
3655         /* None of the fields was found in multiple equalities */
3656         Item_equal *item_equal=
3657           new Item_equal(down_cast<Item_field *>(left_item),
3658                          down_cast<Item_field *>(right_item));
3659         if (item_equal == NULL)
3660           return true;
3661         cond_equal->current_level.push_back(item_equal);
3662       }
3663     }
3664     *simple_equality= true;
3665     return false;
3666   }
3667 
3668   {
3669     /* The predicate of the form field=const/const=field is processed */
3670     Item *const_item= 0;
3671     Item_field *field_item= 0;
3672     if (left_item->type() == Item::FIELD_ITEM &&
3673         (field_item= down_cast<Item_field *>(left_item)) &&
3674         field_item->depended_from == NULL &&
3675         right_item->const_item())
3676     {
3677       const_item= right_item;
3678     }
3679     else if (right_item->type() == Item::FIELD_ITEM &&
3680              (field_item= down_cast<Item_field *>(right_item)) &&
3681              field_item->depended_from == NULL &&
3682              left_item->const_item())
3683     {
3684       const_item= left_item;
3685     }
3686 
3687     if (const_item &&
3688         field_item->result_type() == const_item->result_type())
3689     {
3690       if (field_item->result_type() == STRING_RESULT)
3691       {
3692         const CHARSET_INFO *cs= field_item->field->charset();
3693         if (!item)
3694         {
3695           Item_func_eq *const eq_item= new Item_func_eq(left_item, right_item);
3696           if (eq_item == NULL || eq_item->set_cmp_func())
3697             return true;
3698           eq_item->quick_fix_field();
3699           item= eq_item;
3700         }
3701         if ((cs != down_cast<Item_func *>(item)->compare_collation()) ||
3702             !cs->coll->propagate(cs, 0, 0))
3703           return false;
3704       }
3705 
3706       bool copyfl;
3707       Item_equal *item_equal= find_item_equal(cond_equal, field_item, &copyfl);
3708       if (copyfl)
3709       {
3710         item_equal= new Item_equal(item_equal);
3711         if (item_equal == NULL)
3712           return true;
3713         cond_equal->current_level.push_back(item_equal);
3714       }
3715       if (item_equal)
3716       {
3717         /*
3718           The flag cond_false will be set to 1 after this, if item_equal
3719           already contains a constant and its value is  not equal to
3720           the value of const_item.
3721         */
3722         if (item_equal->add(thd, const_item, field_item))
3723           return true;
3724       }
3725       else
3726       {
3727         item_equal= new Item_equal(const_item, field_item);
3728         if (item_equal == NULL)
3729           return true;
3730         cond_equal->current_level.push_back(item_equal);
3731       }
3732       *simple_equality= true;
3733       return false;
3734     }
3735   }
3736   return false;
3737 }
3738 
3739 
3740 /**
3741   Convert row equalities into a conjunction of regular equalities.
3742 
3743     The function converts a row equality of the form (E1,...,En)=(E'1,...,E'n)
3744     into a list of equalities E1=E'1,...,En=E'n. For each of these equalities
3745     Ei=E'i the function checks whether it is a simple equality or a row
3746     equality. If it is a simple equality it is used to expand multiple
3747     equalities of cond_equal. If it is a row equality it converted to a
3748     sequence of equalities between row elements. If Ei=E'i is neither a
3749     simple equality nor a row equality the item for this predicate is added
3750     to eq_list.
3751 
3752   @param thd        thread handle
3753   @param left_row   left term of the row equality to be processed
3754   @param right_row  right term of the row equality to be processed
3755   @param cond_equal multiple equalities that must hold together with the
3756                     predicate
3757   @param eq_list    results of conversions of row equalities that are not
3758                     simple enough to form multiple equalities
3759   @param[out] simple_equality
3760                     true if the row equality is composed of only
3761                     simple equalities.
3762 
3763   @returns false if conversion succeeded, true if any error.
3764 */
3765 
check_row_equality(THD * thd,Item * left_row,Item_row * right_row,COND_EQUAL * cond_equal,List<Item> * eq_list,bool * simple_equality)3766 static bool check_row_equality(THD *thd, Item *left_row, Item_row *right_row,
3767                                COND_EQUAL *cond_equal, List<Item>* eq_list,
3768                                bool *simple_equality)
3769 {
3770   *simple_equality= false;
3771   uint n= left_row->cols();
3772   for (uint i= 0 ; i < n; i++)
3773   {
3774     bool is_converted;
3775     Item *left_item= left_row->element_index(i);
3776     Item *right_item= right_row->element_index(i);
3777     if (left_item->type() == Item::ROW_ITEM &&
3778         right_item->type() == Item::ROW_ITEM)
3779     {
3780       if (check_row_equality(thd,
3781                              down_cast<Item_row *>(left_item),
3782                              down_cast<Item_row *>(right_item),
3783                              cond_equal, eq_list, &is_converted))
3784         return true;
3785       if (!is_converted)
3786         thd->lex->current_select()->cond_count++;
3787     }
3788     else
3789     {
3790       if (check_simple_equality(thd, left_item, right_item, 0, cond_equal,
3791                                 &is_converted))
3792         return true;
3793       thd->lex->current_select()->cond_count++;
3794     }
3795 
3796     if (!is_converted)
3797     {
3798       Item_func_eq *const eq_item= new Item_func_eq(left_item, right_item);
3799       if (eq_item == NULL)
3800         return true;
3801       if (eq_item->set_cmp_func())
3802       {
3803         // Failed to create cmp func -> not only simple equalitities
3804         return true;
3805       }
3806       eq_item->quick_fix_field();
3807       eq_list->push_back(eq_item);
3808     }
3809   }
3810   *simple_equality= true;
3811   return false;
3812 }
3813 
3814 
3815 /**
3816   Eliminate row equalities and form multiple equalities predicates.
3817 
3818     This function checks whether the item is a simple equality
3819     i.e. the one that equates a field with another field or a constant
3820     (field=field_item or field=constant_item), or, a row equality.
3821     For a simple equality the function looks for a multiple equality
3822     in the lists referenced directly or indirectly by cond_equal inferring
3823     the given simple equality. If it doesn't find any, it builds/expands
3824     multiple equality that covers the predicate.
3825     Row equalities are eliminated substituted for conjunctive regular
3826     equalities which are treated in the same way as original equality
3827     predicates.
3828 
3829   @param thd        thread handle
3830   @param item       predicate to process
3831   @param cond_equal multiple equalities that must hold together with the
3832                     predicate
3833   @param eq_list    results of conversions of row equalities that are not
3834                     simple enough to form multiple equalities
3835   @param[out] equality
3836                     true if re-writing rules have been applied
3837                     false otherwise, i.e.
3838                       if the predicate is not an equality, or
3839                       if the equality is neither a simple nor a row equality
3840 
3841   @returns false if success, true if error
3842 
3843   @note If the equality was created by IN->EXISTS, it may be removed later by
3844   subquery materialization. So we don't mix this possibly temporary equality
3845   with others; if we let it go into a multiple-equality (Item_equal), then we
3846   could not remove it later. There is however an exception: if the outer
3847   expression is a constant, it is safe to leave the equality even in
3848   materialization; all it can do is preventing NULL/FALSE distinction but if
3849   such distinction mattered the equality would be in a triggered condition so
3850   we would not come to this function. And injecting constants is good because
3851   it makes the materialized table smaller.
3852 */
3853 
check_equality(THD * thd,Item * item,COND_EQUAL * cond_equal,List<Item> * eq_list,bool * equality)3854 static bool check_equality(THD *thd, Item *item, COND_EQUAL *cond_equal,
3855                            List<Item> *eq_list, bool *equality)
3856 {
3857   *equality= false;
3858   Item_func *item_func;
3859   if (item->type() == Item::FUNC_ITEM &&
3860       (item_func= down_cast<Item_func *>(item))->functype() ==
3861       Item_func::EQ_FUNC)
3862   {
3863     Item *left_item= item_func->arguments()[0];
3864     Item *right_item= item_func->arguments()[1];
3865 
3866     if (item->created_by_in2exists() && !left_item->const_item())
3867       return false;                             // See note above
3868 
3869     if (left_item->type() == Item::ROW_ITEM &&
3870         right_item->type() == Item::ROW_ITEM)
3871     {
3872       thd->lex->current_select()->cond_count--;
3873       return check_row_equality(thd,
3874                                 down_cast<Item_row *>(left_item),
3875                                 down_cast<Item_row *>(right_item),
3876                                 cond_equal, eq_list, equality);
3877     }
3878     else
3879       return check_simple_equality(thd, left_item, right_item, item, cond_equal,
3880                                    equality);
3881   }
3882 
3883   return false;
3884 }
3885 
3886 
3887 /**
3888   Replace all equality predicates in a condition by multiple equality items.
3889 
3890     At each 'and' level the function detects items for equality predicates
3891     and replaces them by a set of multiple equality items of class Item_equal,
3892     taking into account inherited equalities from upper levels.
3893     If an equality predicate is used not in a conjunction it's just
3894     replaced by a multiple equality predicate.
3895     For each 'and' level the function set a pointer to the inherited
3896     multiple equalities in the cond_equal field of the associated
3897     object of the type Item_cond_and.
3898     The function also traverses the cond tree and for each field reference
3899     sets a pointer to the multiple equality item containing the field, if there
3900     is any. If this multiple equality equates fields to a constant the
3901     function replaces the field reference by the constant in the cases
3902     when the field is not of a string type or when the field reference is
3903     just an argument of a comparison predicate.
3904     The function also determines the maximum number of members in
3905     equality lists of each Item_cond_and object assigning it to
3906     thd->lex->current_select()->max_equal_elems.
3907 
3908   @note
3909     Multiple equality predicate =(f1,..fn) is equivalent to the conjuction of
3910     f1=f2, .., fn-1=fn. It substitutes any inference from these
3911     equality predicates that is equivalent to the conjunction.
3912     Thus, =(a1,a2,a3) can substitute for ((a1=a3) AND (a2=a3) AND (a2=a1)) as
3913     it is equivalent to ((a1=a2) AND (a2=a3)).
3914     The function always makes a substitution of all equality predicates occured
3915     in a conjunction for a minimal set of multiple equality predicates.
3916     This set can be considered as a canonical representation of the
3917     sub-conjunction of the equality predicates.
3918     E.g. (t1.a=t2.b AND t2.b>5 AND t1.a=t3.c) is replaced by
3919     (=(t1.a,t2.b,t3.c) AND t2.b>5), not by
3920     (=(t1.a,t2.b) AND =(t1.a,t3.c) AND t2.b>5);
3921     while (t1.a=t2.b AND t2.b>5 AND t3.c=t4.d) is replaced by
3922     (=(t1.a,t2.b) AND =(t3.c=t4.d) AND t2.b>5),
3923     but if additionally =(t4.d,t2.b) is inherited, it
3924     will be replaced by (=(t1.a,t2.b,t3.c,t4.d) AND t2.b>5)
3925 
3926     The function performs the substitution in a recursive descent of
3927     the condition tree, passing to the next AND level a chain of multiple
3928     equality predicates which have been built at the upper levels.
3929     The Item_equal items built at the level are attached to other
3930     non-equality conjuncts as a sublist. The pointer to the inherited
3931     multiple equalities is saved in the and condition object (Item_cond_and).
3932     This chain allows us for any field reference occurence to easily find a
3933     multiple equality that must be held for this occurence.
3934     For each AND level we do the following:
3935     - scan it for all equality predicate (=) items
3936     - join them into disjoint Item_equal() groups
3937     - process the included OR conditions recursively to do the same for
3938       lower AND levels.
3939 
3940     We need to do things in this order as lower AND levels need to know about
3941     all possible Item_equal objects in upper levels.
3942 
3943   @param thd          thread handle
3944   @param cond         condition(expression) where to make replacement
3945   @param[out] retcond returned condition
3946   @param inherited    path to all inherited multiple equality items
3947   @param do_inherit   whether or not to inherit equalities from other parts
3948                       of the condition
3949 
3950   @returns false if success, true if error
3951 */
3952 
build_equal_items_for_cond(THD * thd,Item * cond,Item ** retcond,COND_EQUAL * inherited,bool do_inherit)3953 static bool build_equal_items_for_cond(THD *thd, Item *cond, Item **retcond,
3954                                        COND_EQUAL *inherited, bool do_inherit)
3955 {
3956   Item_equal *item_equal;
3957   COND_EQUAL cond_equal;
3958   cond_equal.upper_levels= inherited;
3959 
3960   if (check_stack_overrun(thd, STACK_MIN_SIZE, NULL))
3961     return true;                          // Fatal error flag is set!
3962 
3963   const enum Item::Type cond_type= cond->type();
3964   if (cond_type == Item::COND_ITEM)
3965   {
3966     List<Item> eq_list;
3967     Item_cond *const item_cond= down_cast<Item_cond *>(cond);
3968     const bool and_level= item_cond->functype() == Item_func::COND_AND_FUNC;
3969     List<Item> *args= item_cond->argument_list();
3970 
3971     List_iterator<Item> li(*args);
3972     Item *item;
3973 
3974     if (and_level)
3975     {
3976       /*
3977          Retrieve all conjuncts of this level detecting the equality
3978          that are subject to substitution by multiple equality items and
3979          removing each such predicate from the conjunction after having
3980          found/created a multiple equality whose inference the predicate is.
3981      */
3982       while ((item= li++))
3983       {
3984         /*
3985           PS/SP note: we can safely remove a node from AND-OR
3986           structure here because it's restored before each
3987           re-execution of any prepared statement/stored procedure.
3988         */
3989         bool equality;
3990         if (check_equality(thd, item, &cond_equal, &eq_list, &equality))
3991           return true;
3992         if (equality)
3993           li.remove();
3994       }
3995 
3996       /*
3997         Check if we eliminated all the predicates of the level, e.g.
3998         (a=a AND b=b AND a=a).
3999       */
4000       if (!args->elements &&
4001           !cond_equal.current_level.elements &&
4002           !eq_list.elements)
4003       {
4004         *retcond= new Item_int((longlong) 1, 1);
4005         return *retcond == NULL;
4006       }
4007 
4008       List_iterator_fast<Item_equal> it(cond_equal.current_level);
4009       while ((item_equal= it++))
4010       {
4011         item_equal->fix_length_and_dec();
4012         item_equal->update_used_tables();
4013         set_if_bigger(thd->lex->current_select()->max_equal_elems,
4014                       item_equal->members());
4015       }
4016 
4017       Item_cond_and *const item_cond_and= down_cast<Item_cond_and *>(cond);
4018       item_cond_and->cond_equal= cond_equal;
4019       inherited= &item_cond_and->cond_equal;
4020     }
4021     /*
4022        Make replacement of equality predicates for lower levels
4023        of the condition expression.
4024     */
4025     li.rewind();
4026     while ((item= li++))
4027     {
4028       Item *new_item;
4029       if (build_equal_items_for_cond(thd, item, &new_item, inherited,
4030                                      do_inherit))
4031         return true;
4032       if (new_item != item)
4033       {
4034         /* This replacement happens only for standalone equalities */
4035         /*
4036           This is ok with PS/SP as the replacement is done for
4037           arguments of an AND/OR item, which are restored for each
4038           execution of PS/SP.
4039         */
4040         li.replace(new_item);
4041       }
4042     }
4043     if (and_level)
4044     {
4045       args->concat(&eq_list);
4046       args->concat((List<Item> *)&cond_equal.current_level);
4047     }
4048   }
4049   else if (cond->type() == Item::FUNC_ITEM)
4050   {
4051     List<Item> eq_list;
4052     /*
4053       If an equality predicate forms the whole and level,
4054       we call it standalone equality and it's processed here.
4055       E.g. in the following where condition
4056       WHERE a=5 AND (b=5 or a=c)
4057       (b=5) and (a=c) are standalone equalities.
4058       In general we can't leave alone standalone eqalities:
4059       for WHERE a=b AND c=d AND (b=c OR d=5)
4060       b=c is replaced by =(a,b,c,d).
4061      */
4062     bool equality;
4063     if (check_equality(thd, cond, &cond_equal, &eq_list, &equality))
4064       return true;
4065     if (equality)
4066     {
4067       int n= cond_equal.current_level.elements + eq_list.elements;
4068       if (n == 0)
4069       {
4070         *retcond= new Item_int((longlong) 1,1);
4071         return *retcond == NULL;
4072       }
4073       else if (n == 1)
4074       {
4075         if ((item_equal= cond_equal.current_level.pop()))
4076         {
4077           item_equal->fix_length_and_dec();
4078           item_equal->update_used_tables();
4079           set_if_bigger(thd->lex->current_select()->max_equal_elems,
4080                         item_equal->members());
4081           *retcond= item_equal;
4082           return false;
4083 	}
4084 
4085         *retcond= eq_list.pop();
4086         return false;
4087       }
4088       else
4089       {
4090         /*
4091           Here a new AND level must be created. It can happen only
4092           when a row equality is processed as a standalone predicate.
4093 	*/
4094         Item_cond_and *and_cond= new Item_cond_and(eq_list);
4095         if (and_cond == NULL)
4096           return true;
4097 
4098         and_cond->quick_fix_field();
4099         List<Item> *args= and_cond->argument_list();
4100         List_iterator_fast<Item_equal> it(cond_equal.current_level);
4101         while ((item_equal= it++))
4102         {
4103           item_equal->fix_length_and_dec();
4104           item_equal->update_used_tables();
4105           set_if_bigger(thd->lex->current_select()->max_equal_elems,
4106                         item_equal->members());
4107         }
4108         and_cond->cond_equal= cond_equal;
4109         args->concat((List<Item> *)&cond_equal.current_level);
4110 
4111         *retcond= and_cond;
4112         return false;
4113       }
4114     }
4115 
4116     if (do_inherit)
4117     {
4118       /*
4119         For each field reference in cond, not from equal item predicates,
4120         set a pointer to the multiple equality it belongs to (if there is any)
4121         as soon the field is not of a string type or the field reference is
4122         an argument of a comparison predicate.
4123       */
4124       uchar *is_subst_valid= (uchar *) 1;
4125       cond= cond->compile(&Item::subst_argument_checker,
4126                           &is_subst_valid,
4127                           &Item::equal_fields_propagator,
4128                           (uchar *) inherited);
4129       if (cond == NULL)
4130         return true;
4131     }
4132     cond->update_used_tables();
4133   }
4134   *retcond= cond;
4135   return false;
4136 }
4137 
4138 
4139 /**
4140   Build multiple equalities for a WHERE condition and all join conditions that
4141   inherit these multiple equalities.
4142 
4143     The function first applies the build_equal_items_for_cond function
4144     to build all multiple equalities for condition cond utilizing equalities
4145     referred through the parameter inherited. The extended set of
4146     equalities is returned in the structure referred by the cond_equal_ref
4147     parameter. After this the function calls itself recursively for
4148     all join conditions whose direct references can be found in join_list
4149     and who inherit directly the multiple equalities just having built.
4150 
4151   @note
4152     The join condition used in an outer join operation inherits all equalities
4153     from the join condition of the embedding join, if there is any, or
4154     otherwise - from the where condition.
4155     This fact is not obvious, but presumably can be proved.
4156     Consider the following query:
4157     @code
4158       SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t1.a=t3.a AND t2.a=t4.a
4159         WHERE t1.a=t2.a;
4160     @endcode
4161     If the join condition in the query inherits =(t1.a,t2.a), then we
4162     can build the multiple equality =(t1.a,t2.a,t3.a,t4.a) that infers
4163     the equality t3.a=t4.a. Although the join condition
4164     t1.a=t3.a AND t2.a=t4.a AND t3.a=t4.a is not equivalent to the one
4165     in the query the latter can be replaced by the former: the new query
4166     will return the same result set as the original one.
4167 
4168     Interesting that multiple equality =(t1.a,t2.a,t3.a,t4.a) allows us
4169     to use t1.a=t3.a AND t3.a=t4.a under the join condition:
4170     @code
4171       SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t1.a=t3.a AND t3.a=t4.a
4172         WHERE t1.a=t2.a
4173     @endcode
4174     This query equivalent to:
4175     @code
4176       SELECT * FROM (t1 LEFT JOIN (t3,t4) ON t1.a=t3.a AND t3.a=t4.a),t2
4177         WHERE t1.a=t2.a
4178     @endcode
4179     Similarly the original query can be rewritten to the query:
4180     @code
4181       SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t2.a=t4.a AND t3.a=t4.a
4182         WHERE t1.a=t2.a
4183     @endcode
4184     that is equivalent to:
4185     @code
4186       SELECT * FROM (t2 LEFT JOIN (t3,t4)ON t2.a=t4.a AND t3.a=t4.a), t1
4187         WHERE t1.a=t2.a
4188     @endcode
4189     Thus, applying equalities from the where condition we basically
4190     can get more freedom in performing join operations.
4191     Although we don't use this property now, it probably makes sense to use
4192     it in the future.
4193 
4194   @param thd		     Thread handler
4195   @param cond                condition to build the multiple equalities for
4196   @param[out] retcond        Returned condition
4197   @param inherited           path to all inherited multiple equality items
4198   @param do_inherit          whether or not to inherit equalities from other
4199                              parts of the condition
4200   @param join_list           list of join tables that the condition refers to
4201   @param[out] cond_equal_ref pointer to the structure to place built
4202                              equalities in
4203 
4204   @returns false if success, true if error
4205 */
4206 
build_equal_items(THD * thd,Item * cond,Item ** retcond,COND_EQUAL * inherited,bool do_inherit,List<TABLE_LIST> * join_list,COND_EQUAL ** cond_equal_ref)4207 bool build_equal_items(THD *thd, Item *cond, Item **retcond,
4208                        COND_EQUAL *inherited, bool do_inherit,
4209                        List<TABLE_LIST> *join_list,
4210                        COND_EQUAL **cond_equal_ref)
4211 {
4212   COND_EQUAL *cond_equal= 0;
4213 
4214   if (cond)
4215   {
4216     if (build_equal_items_for_cond(thd, cond, &cond, inherited, do_inherit))
4217       return true;
4218     cond->update_used_tables();
4219     const enum Item::Type cond_type= cond->type();
4220     if (cond_type == Item::COND_ITEM &&
4221         down_cast<Item_cond *>(cond)->functype() == Item_func::COND_AND_FUNC)
4222       cond_equal= &down_cast<Item_cond_and *>(cond)->cond_equal;
4223     else if (cond_type == Item::FUNC_ITEM &&
4224          down_cast<Item_func *>(cond)->functype() == Item_func::MULT_EQUAL_FUNC)
4225     {
4226       cond_equal= new COND_EQUAL;
4227       if (cond_equal == NULL)
4228         return true;
4229       cond_equal->current_level.push_back(down_cast<Item_equal *>(cond));
4230     }
4231   }
4232   if (cond_equal)
4233   {
4234     cond_equal->upper_levels= inherited;
4235     inherited= cond_equal;
4236   }
4237   *cond_equal_ref= cond_equal;
4238 
4239   if (join_list)
4240   {
4241     TABLE_LIST *table;
4242     List_iterator<TABLE_LIST> li(*join_list);
4243 
4244     while ((table= li++))
4245     {
4246       if (table->join_cond_optim())
4247       {
4248         List<TABLE_LIST> *nested_join_list= table->nested_join ?
4249           &table->nested_join->join_list : NULL;
4250         Item *join_cond;
4251         if (build_equal_items(thd, table->join_cond_optim(), &join_cond,
4252                               inherited, do_inherit,
4253                               nested_join_list, &table->cond_equal))
4254           return true;
4255         table->set_join_cond_optim(join_cond);
4256       }
4257     }
4258   }
4259 
4260   *retcond= cond;
4261   return false;
4262 }
4263 
4264 
4265 /**
4266   Compare field items by table order in the execution plan.
4267 
4268     field1 considered as better than field2 if the table containing
4269     field1 is accessed earlier than the table containing field2.
4270     The function finds out what of two fields is better according
4271     this criteria.
4272 
4273   @param field1          first field item to compare
4274   @param field2          second field item to compare
4275   @param table_join_idx  index to tables determining table order
4276 
4277   @retval
4278    -1  if field1 is better than field2
4279   @retval
4280     1  if field2 is better than field1
4281   @retval
4282     0  otherwise
4283 */
4284 
compare_fields_by_table_order(Item_field * field1,Item_field * field2,void * table_join_idx)4285 static int compare_fields_by_table_order(Item_field *field1,
4286                                   Item_field *field2,
4287                                   void *table_join_idx)
4288 {
4289   int cmp= 0;
4290   bool outer_ref= 0;
4291   if (field1->used_tables() & OUTER_REF_TABLE_BIT)
4292   {
4293     outer_ref= 1;
4294     cmp= -1;
4295   }
4296   if (field2->used_tables() & OUTER_REF_TABLE_BIT)
4297   {
4298     outer_ref= 1;
4299     cmp++;
4300   }
4301   if (outer_ref)
4302     return cmp;
4303   JOIN_TAB **idx= (JOIN_TAB **) table_join_idx;
4304 
4305   /*
4306     idx is NULL if this function was not called from JOIN::optimize()
4307     but from e.g. mysql_delete() or mysql_update(). In these cases
4308     there is only one table and both fields belong to it. Example
4309     condition where this is the case: t1.fld1=t1.fld2
4310   */
4311   if (!idx)
4312     return 0;
4313 
4314   // Locate JOIN_TABs thanks to table_join_idx, then compare their index.
4315   cmp= idx[field1->table_ref->tableno()]->idx() -
4316        idx[field2->table_ref->tableno()]->idx();
4317   return cmp < 0 ? -1 : (cmp ? 1 : 0);
4318 }
4319 
4320 
4321 /**
4322   Generate minimal set of simple equalities equivalent to a multiple equality.
4323 
4324     The function retrieves the fields of the multiple equality item
4325     item_equal and  for each field f:
4326     - if item_equal contains const it generates the equality f=const_item;
4327     - otherwise, if f is not the first field, generates the equality
4328       f=item_equal->get_first().
4329     All generated equality are added to the cond conjunction.
4330 
4331   @param cond            condition to add the generated equality to
4332   @param upper_levels    structure to access multiple equality of upper levels
4333   @param item_equal      multiple equality to generate simple equality from
4334 
4335   @note
4336     Before generating an equality function checks that it has not
4337     been generated for multiple equalities of the upper levels.
4338     E.g. for the following where condition
4339     WHERE a=5 AND ((a=b AND b=c) OR  c>4)
4340     the upper level AND condition will contain =(5,a),
4341     while the lower level AND condition will contain =(5,a,b,c).
4342     When splitting =(5,a,b,c) into a separate equality predicates
4343     we should omit 5=a, as we have it already in the upper level.
4344     The following where condition gives us a more complicated case:
4345     WHERE t1.a=t2.b AND t3.c=t4.d AND (t2.b=t3.c OR t4.e>5 ...) AND ...
4346     Given the tables are accessed in the order t1->t2->t3->t4 for
4347     the selected query execution plan the lower level multiple
4348     equality =(t1.a,t2.b,t3.c,t4.d) formally  should be converted to
4349     t1.a=t2.b AND t1.a=t3.c AND t1.a=t4.d. But t1.a=t2.a will be
4350     generated for the upper level. Also t3.c=t4.d will be generated there.
4351     So only t1.a=t3.c should be left in the lower level.
4352     If cond is equal to 0, then not more then one equality is generated
4353     and a pointer to it is returned as the result of the function.
4354 
4355   @return
4356     - The condition with generated simple equalities or
4357     a pointer to the simple generated equality, if success.
4358     - 0, otherwise.
4359 */
4360 
eliminate_item_equal(Item * cond,COND_EQUAL * upper_levels,Item_equal * item_equal)4361 static Item *eliminate_item_equal(Item *cond, COND_EQUAL *upper_levels,
4362                                   Item_equal *item_equal)
4363 {
4364   List<Item> eq_list;
4365   Item_func_eq *eq_item= NULL;
4366   if (((Item *) item_equal)->const_item() && !item_equal->val_int())
4367     return new Item_int((longlong) 0,1);
4368   Item *const item_const= item_equal->get_const();
4369   Item_equal_iterator it(*item_equal);
4370   if (!item_const)
4371   {
4372     /*
4373       If there is a const item, match all field items with the const item,
4374       otherwise match the second and subsequent field items with the first one:
4375     */
4376     it++;
4377   }
4378   Item_field *item_field; // Field to generate equality for.
4379   while ((item_field= it++))
4380   {
4381     /*
4382       Generate an equality of the form:
4383       item_field = some previous field in item_equal's list.
4384 
4385       First see if we really need to generate it:
4386     */
4387     Item_equal *const upper= item_field->find_item_equal(upper_levels);
4388     if (upper) // item_field is in this upper equality
4389     {
4390       if (item_const && upper->get_const())
4391         continue; // Const at both levels, no need to generate at current level
4392       /*
4393         If the upper-level multiple equality contains this item, there is no
4394         need to generate the equality, unless item_field belongs to a
4395         semi-join nest that is used for Materialization, and refers to tables
4396         that are outside of the materialized semi-join nest,
4397         As noted in Item_equal::get_subst_item(), subquery materialization
4398         does not have this problem.
4399       */
4400       JOIN_TAB *const tab= item_field->field->table->reginfo.join_tab;
4401 
4402       if (!(tab && sj_is_materialize_strategy(tab->get_sj_strategy())))
4403       {
4404         Item_field *item_match;
4405         Item_equal_iterator li(*item_equal);
4406         while ((item_match= li++) != item_field)
4407         {
4408           if (item_match->find_item_equal(upper_levels) == upper)
4409             break; // (item_match, item_field) is also in upper level equality
4410         }
4411         if (item_match != item_field)
4412           continue;
4413       }
4414     } // ... if (upper).
4415 
4416     /*
4417       item_field should be compared with the head of the multiple equality
4418       list.
4419       item_field may refer to a table that is within a semijoin materialization
4420       nest. In that case, the order of the join_tab entries may look like:
4421 
4422         ot1 ot2 <subquery> ot5 SJM(it3 it4)
4423 
4424       If we have a multiple equality
4425 
4426         (ot1.c1, ot2.c2, <subquery>.c it3.c3, it4.c4, ot5.c5),
4427 
4428       we should generate the following equalities:
4429         1. ot1.c1 = ot2.c2
4430         2. ot1.c1 = <subquery>.c
4431         3. it3.c3 = it4.c4
4432         4. ot1.c1 = ot5.c5
4433 
4434       Equalities 1) and 4) are regular equalities between two outer tables.
4435       Equality 2) is an equality that matches the outer query with a
4436       materialized temporary table. It is either performed as a lookup
4437       into the materialized table (SJM-lookup), or as a condition on the
4438       outer table (SJM-scan).
4439       Equality 3) is evaluated during semijoin materialization.
4440 
4441       If there is a const item, match against this one.
4442       Otherwise, match against the first field item in the multiple equality,
4443       unless the item is within a materialized semijoin nest, in case it will
4444       be matched against the first item within the SJM nest.
4445       @see JOIN::set_prefix_tables()
4446       @see Item_equal::get_subst_item()
4447     */
4448 
4449     Item *const head=
4450       item_const ? item_const : item_equal->get_subst_item(item_field);
4451     if (head == item_field)
4452       continue;
4453 
4454     // we have a pair, can generate 'item_field=head'
4455     if (eq_item)
4456       eq_list.push_back(eq_item);
4457 
4458     eq_item= new Item_func_eq(item_field, head);
4459     if (!eq_item || eq_item->set_cmp_func())
4460       return NULL;
4461     eq_item->quick_fix_field();
4462   } // ... while ((item_field= it++))
4463 
4464   if (!cond && !eq_list.head())
4465   {
4466     if (!eq_item)
4467       return new Item_int((longlong) 1,1);
4468     return eq_item;
4469   }
4470 
4471   if (eq_item)
4472     eq_list.push_back(eq_item);
4473   if (!cond)
4474     cond= new Item_cond_and(eq_list);
4475   else
4476   {
4477     assert(cond->type() == Item::COND_ITEM);
4478     if (eq_list.elements)
4479       ((Item_cond *) cond)->add_at_head(&eq_list);
4480   }
4481 
4482   cond->quick_fix_field();
4483   cond->update_used_tables();
4484 
4485   return cond;
4486 }
4487 
4488 
4489 /**
4490   Substitute every field reference in a condition by the best equal field
4491   and eliminate all multiple equality predicates.
4492 
4493     The function retrieves the cond condition and for each encountered
4494     multiple equality predicate it sorts the field references in it
4495     according to the order of tables specified by the table_join_idx
4496     parameter. Then it eliminates the multiple equality predicate it
4497     replacing it by the conjunction of simple equality predicates
4498     equating every field from the multiple equality to the first
4499     field in it, or to the constant, if there is any.
4500     After this the function retrieves all other conjuncted
4501     predicates substitute every field reference by the field reference
4502     to the first equal field or equal constant if there are any.
4503 
4504   @param cond            condition to process
4505   @param cond_equal      multiple equalities to take into consideration
4506   @param table_join_idx  index to tables determining field preference
4507 
4508   @note
4509     At the first glance full sort of fields in multiple equality
4510     seems to be an overkill. Yet it's not the case due to possible
4511     new fields in multiple equality item of lower levels. We want
4512     the order in them to comply with the order of upper levels.
4513 
4514   @return
4515     The transformed condition, or NULL in case of error
4516 */
4517 
substitute_for_best_equal_field(Item * cond,COND_EQUAL * cond_equal,void * table_join_idx)4518 Item* substitute_for_best_equal_field(Item *cond,
4519                                       COND_EQUAL *cond_equal,
4520                                       void *table_join_idx)
4521 {
4522   Item_equal *item_equal;
4523 
4524   if (cond->type() == Item::COND_ITEM)
4525   {
4526     List<Item> *cond_list= ((Item_cond*) cond)->argument_list();
4527 
4528     bool and_level= ((Item_cond*) cond)->functype() ==
4529                       Item_func::COND_AND_FUNC;
4530     if (and_level)
4531     {
4532       cond_equal= &((Item_cond_and *) cond)->cond_equal;
4533       cond_list->disjoin((List<Item> *) &cond_equal->current_level);
4534 
4535       List_iterator_fast<Item_equal> it(cond_equal->current_level);
4536       while ((item_equal= it++))
4537       {
4538         item_equal->sort(&compare_fields_by_table_order, table_join_idx);
4539       }
4540     }
4541 
4542     List_iterator<Item> li(*cond_list);
4543     Item *item;
4544     while ((item= li++))
4545     {
4546       Item *new_item= substitute_for_best_equal_field(item, cond_equal,
4547                                                       table_join_idx);
4548       if (new_item == NULL)
4549         return NULL;
4550       /*
4551         This works OK with PS/SP re-execution as changes are made to
4552         the arguments of AND/OR items only
4553       */
4554       if (new_item != item)
4555         li.replace(new_item);
4556     }
4557 
4558     if (and_level)
4559     {
4560       List_iterator_fast<Item_equal> it(cond_equal->current_level);
4561       while ((item_equal= it++))
4562       {
4563         cond= eliminate_item_equal(cond, cond_equal->upper_levels, item_equal);
4564         if (cond == NULL)
4565           return NULL;
4566         // This occurs when eliminate_item_equal() founds that cond is
4567         // always false and substitutes it with Item_int 0.
4568         // Due to this, value of item_equal will be 0, so just return it.
4569         if (cond->type() != Item::COND_ITEM)
4570           break;
4571       }
4572     }
4573     if (cond->type() == Item::COND_ITEM &&
4574         !((Item_cond*)cond)->argument_list()->elements)
4575       cond= new Item_int((int32)cond->val_bool());
4576 
4577   }
4578   else if (cond->type() == Item::FUNC_ITEM &&
4579            ((Item_cond*) cond)->functype() == Item_func::MULT_EQUAL_FUNC)
4580   {
4581     item_equal= (Item_equal *) cond;
4582     item_equal->sort(&compare_fields_by_table_order, table_join_idx);
4583     if (cond_equal && cond_equal->current_level.head() == item_equal)
4584       cond_equal= cond_equal->upper_levels;
4585     return eliminate_item_equal(0, cond_equal, item_equal);
4586   }
4587   else
4588     cond->transform(&Item::replace_equal_field, 0);
4589   return cond;
4590 }
4591 
4592 
4593 /**
4594   change field = field to field = const for each found field = const in the
4595   and_level
4596 
4597   @param thd      Thread handler
4598   @param save_list
4599   @param and_father
4600   @param cond       Condition where fields are replaced with constant values
4601   @param field      The field that will be substituted
4602   @param value      The substitution value
4603 
4604   @returns false if success, true if error
4605 */
4606 
4607 static bool
change_cond_ref_to_const(THD * thd,I_List<COND_CMP> * save_list,Item * and_father,Item * cond,Item * field,Item * value)4608 change_cond_ref_to_const(THD *thd, I_List<COND_CMP> *save_list,
4609                          Item *and_father, Item *cond,
4610                          Item *field, Item *value)
4611 {
4612   if (cond->type() == Item::COND_ITEM)
4613   {
4614     Item_cond *const item_cond= down_cast<Item_cond *>(cond);
4615     bool and_level= item_cond->functype() == Item_func::COND_AND_FUNC;
4616     List_iterator<Item> li(*item_cond->argument_list());
4617     Item *item;
4618     while ((item=li++))
4619     {
4620       if (change_cond_ref_to_const(thd, save_list,
4621                                    and_level ? cond : item,
4622                                    item, field, value))
4623         return true;
4624     }
4625     return false;
4626   }
4627   if (cond->eq_cmp_result() == Item::COND_OK)
4628     return false;                // Not a boolean function
4629 
4630   Item_bool_func2 *func= down_cast<Item_bool_func2 *>(cond);
4631   Item **args= func->arguments();
4632   Item *left_item=  args[0];
4633   Item *right_item= args[1];
4634   Item_func::Functype functype= func->functype();
4635 
4636   if (right_item->eq(field,0) && left_item != value &&
4637       right_item->cmp_context == field->cmp_context &&
4638       (left_item->result_type() != STRING_RESULT ||
4639        value->result_type() != STRING_RESULT ||
4640        left_item->collation.collation == value->collation.collation))
4641   {
4642     Item *const clone= value->clone_item();
4643     if (thd->is_error())
4644       return true;
4645 
4646     if (clone == NULL)
4647       return false;
4648 
4649     clone->collation.set(right_item->collation);
4650     thd->change_item_tree(args + 1, clone);
4651     func->update_used_tables();
4652     if ((functype == Item_func::EQ_FUNC ||
4653          functype == Item_func::EQUAL_FUNC) &&
4654         and_father != cond && !left_item->const_item())
4655     {
4656       cond->marker=1;
4657       COND_CMP *const cond_cmp= new COND_CMP(and_father,func);
4658       if (cond_cmp == NULL)
4659         return true;
4660 
4661       save_list->push_back(cond_cmp);
4662 
4663     }
4664     if (func->set_cmp_func())
4665       return true;
4666   }
4667   else if (left_item->eq(field,0) && right_item != value &&
4668            left_item->cmp_context == field->cmp_context &&
4669            (right_item->result_type() != STRING_RESULT ||
4670             value->result_type() != STRING_RESULT ||
4671             right_item->collation.collation == value->collation.collation))
4672   {
4673     Item *const clone= value->clone_item();
4674     if (thd->is_error())
4675       return true;
4676 
4677     if (clone == NULL)
4678       return false;
4679 
4680     clone->collation.set(left_item->collation);
4681     thd->change_item_tree(args, clone);
4682     value= clone;
4683     func->update_used_tables();
4684     if ((functype == Item_func::EQ_FUNC ||
4685          functype == Item_func::EQUAL_FUNC) &&
4686         and_father != cond && !right_item->const_item())
4687     {
4688       args[0]= args[1];                       // For easy check
4689       thd->change_item_tree(args + 1, value);
4690       cond->marker=1;
4691       COND_CMP *const cond_cmp= new COND_CMP(and_father,func);
4692       if (cond_cmp == NULL)
4693         return true;
4694 
4695       save_list->push_back(cond_cmp);
4696     }
4697     if (func->set_cmp_func())
4698       return true;
4699   }
4700   return false;
4701 }
4702 
4703 /**
4704   Propagate constant values in a condition
4705 
4706   @param thd        Thread handler
4707   @param save_list
4708   @param and_father
4709   @param cond       Condition for which constant values are propagated
4710 
4711   @returns false if success, true if error
4712 */
4713 static bool
propagate_cond_constants(THD * thd,I_List<COND_CMP> * save_list,Item * and_father,Item * cond)4714 propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list,
4715                          Item *and_father, Item *cond)
4716 {
4717   if (cond->type() == Item::COND_ITEM)
4718   {
4719     Item_cond *const item_cond= down_cast<Item_cond *>(cond);
4720     bool and_level= item_cond->functype() == Item_func::COND_AND_FUNC;
4721     List_iterator_fast<Item> li(*item_cond->argument_list());
4722     Item *item;
4723     I_List<COND_CMP> save;
4724     while ((item=li++))
4725     {
4726       if (propagate_cond_constants(thd, &save, and_level ? cond : item, item))
4727         return true;
4728     }
4729     if (and_level)
4730     {						// Handle other found items
4731       I_List_iterator<COND_CMP> cond_itr(save);
4732       COND_CMP *cond_cmp;
4733       while ((cond_cmp= cond_itr++))
4734       {
4735         Item **args= cond_cmp->cmp_func->arguments();
4736         if (!args[0]->const_item() &&
4737             change_cond_ref_to_const(thd, &save, cond_cmp->and_level,
4738                                      cond_cmp->and_level, args[0], args[1]))
4739           return true;
4740       }
4741     }
4742   }
4743   else if (and_father != cond && !cond->marker)		// In a AND group
4744   {
4745     Item_func *func;
4746     if (cond->type() == Item::FUNC_ITEM &&
4747         (func= down_cast<Item_func *>(cond)) &&
4748 	(func->functype() == Item_func::EQ_FUNC ||
4749 	 func->functype() == Item_func::EQUAL_FUNC))
4750     {
4751       Item **args= func->arguments();
4752       bool left_const= args[0]->const_item();
4753       bool right_const= args[1]->const_item();
4754       if (!(left_const && right_const) &&
4755           args[0]->result_type() == args[1]->result_type())
4756       {
4757 	if (right_const)
4758 	{
4759           if (resolve_const_item(thd, &args[1], args[0]))
4760             return true;
4761 	  func->update_used_tables();
4762           if (change_cond_ref_to_const(thd, save_list, and_father, and_father,
4763                                        args[0], args[1]))
4764             return true;
4765 	}
4766 	else if (left_const)
4767 	{
4768           if (resolve_const_item(thd, &args[0], args[1]))
4769             return true;
4770 	  func->update_used_tables();
4771           if (change_cond_ref_to_const(thd, save_list, and_father, and_father,
4772                                        args[1], args[0]))
4773             return true;
4774 	}
4775       }
4776     }
4777   }
4778 
4779   return false;
4780 }
4781 
4782 
4783 /**
4784   Assign each nested join structure a bit in nested_join_map.
4785 
4786   @param join_list     List of tables
4787   @param first_unused  Number of first unused bit in nested_join_map before the
4788                        call
4789 
4790   @note
4791     This function is called after simplify_joins(), when there are no
4792     redundant nested joins.
4793     We cannot have more nested joins in a query block than there are tables,
4794     so as long as the number of bits in nested_join_map is not less than the
4795     maximum number of tables in a query block, nested_join_map can never
4796     overflow.
4797 
4798   @return
4799     First unused bit in nested_join_map after the call.
4800 */
4801 
build_bitmap_for_nested_joins(List<TABLE_LIST> * join_list,uint first_unused)4802 uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list,
4803                                    uint first_unused)
4804 {
4805   List_iterator<TABLE_LIST> li(*join_list);
4806   TABLE_LIST *table;
4807   DBUG_ENTER("build_bitmap_for_nested_joins");
4808   while ((table= li++))
4809   {
4810     NESTED_JOIN *nested_join;
4811     if ((nested_join= table->nested_join))
4812     {
4813       // We should have either a join condition or a semi-join condition
4814       assert((table->join_cond() == NULL) == (table->sj_cond() != NULL));
4815 
4816       nested_join->nj_map= 0;
4817       nested_join->nj_total= 0;
4818       /*
4819         We only record nested join information for outer join nests.
4820         Tables belonging in semi-join nests are recorded in the
4821         embedding outer join nest, if one exists.
4822       */
4823       if (table->join_cond())
4824       {
4825         assert(first_unused < sizeof(nested_join_map)*8);
4826         nested_join->nj_map= (nested_join_map) 1 << first_unused++;
4827         nested_join->nj_total= nested_join->join_list.elements;
4828       }
4829       else if (table->sj_cond())
4830       {
4831         NESTED_JOIN *const outer_nest=
4832           table->embedding ? table->embedding->nested_join : NULL;
4833         /*
4834           The semi-join nest has already been counted into the table count
4835           for the outer join nest as one table, so subtract 1 from the
4836           table count.
4837         */
4838         if (outer_nest)
4839           outer_nest->nj_total+= (nested_join->join_list.elements - 1);
4840       }
4841       else
4842         assert(false);
4843 
4844       first_unused= build_bitmap_for_nested_joins(&nested_join->join_list,
4845                                                   first_unused);
4846     }
4847   }
4848   DBUG_RETURN(first_unused);
4849 }
4850 
4851 
4852 /** Update the dependency map for the tables. */
4853 
update_depend_map()4854 void JOIN::update_depend_map()
4855 {
4856   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
4857   for (uint tableno = 0; tableno < tables; tableno++)
4858   {
4859     JOIN_TAB *const tab= best_ref[tableno];
4860     TABLE_REF *const ref= &tab->ref();
4861     table_map depend_map= 0;
4862     Item **item= ref->items;
4863     for (uint i = 0; i < ref->key_parts; i++, item++)
4864       depend_map|= (*item)->used_tables();
4865     depend_map&= ~PSEUDO_TABLE_BITS;
4866     ref->depend_map= depend_map;
4867     for (JOIN_TAB **tab2= map2table; depend_map; tab2++, depend_map >>= 1)
4868     {
4869       if (depend_map & 1)
4870 	ref->depend_map|= (*tab2)->ref().depend_map;
4871     }
4872   }
4873 }
4874 
4875 
4876 /** Update the dependency map for the sort order. */
4877 
update_depend_map(ORDER * order)4878 void JOIN::update_depend_map(ORDER *order)
4879 {
4880   for (; order ; order=order->next)
4881   {
4882     table_map depend_map;
4883     order->item[0]->update_used_tables();
4884     order->depend_map= depend_map=
4885       order->item[0]->used_tables() & ~PARAM_TABLE_BIT;
4886     order->used= 0;
4887     // Not item_sum(), RAND() and no reference to table outside of sub select
4888     if (!(order->depend_map & (OUTER_REF_TABLE_BIT | RAND_TABLE_BIT))
4889         && !order->item[0]->with_sum_func)
4890     {
4891       for (JOIN_TAB **tab= map2table; depend_map; tab++, depend_map >>= 1)
4892       {
4893 	if (depend_map & 1)
4894 	  order->depend_map|=(*tab)->ref().depend_map;
4895       }
4896     }
4897   }
4898 }
4899 
4900 
4901 /**
4902   Update equalities and keyuse references after semi-join materialization
4903   strategy is chosen.
4904 
4905   @details
4906     For each multiple equality that contains a field that is selected
4907     from a subquery, and that subquery is executed using a semi-join
4908     materialization strategy, add the corresponding column in the materialized
4909     temporary table to the equality.
4910     For each injected semi-join equality that is not converted to
4911     multiple equality, replace the reference to the expression selected
4912     from the subquery with the corresponding column in the temporary table.
4913 
4914     This is needed to properly reflect the equalities that involve injected
4915     semi-join equalities when materialization strategy is chosen.
4916     @see eliminate_item_equal() for how these equalities are used to generate
4917     correct equality predicates.
4918 
4919     The MaterializeScan semi-join strategy requires some additional processing:
4920     All primary tables after the materialized temporary table must be inspected
4921     for keyuse objects that point to expressions from the subquery tables.
4922     These references must be replaced with references to corresponding columns
4923     in the materialized temporary table instead. Those primary tables using
4924     ref access will thus be made to depend on the materialized temporary table
4925     instead of the subquery tables.
4926 
4927     Only the injected semi-join equalities need this treatment, other predicates
4928     will be handled correctly by the regular item substitution process.
4929 
4930   @return False if success, true if error
4931 */
4932 
update_equalities_for_sjm()4933 bool JOIN::update_equalities_for_sjm()
4934 {
4935   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
4936   List_iterator<Semijoin_mat_exec> it(sjm_exec_list);
4937   Semijoin_mat_exec *sjm_exec;
4938   while ((sjm_exec= it++))
4939   {
4940     TABLE_LIST *const sj_nest= sjm_exec->sj_nest;
4941 
4942     assert(!sj_nest->outer_join_nest());
4943     /*
4944       A materialized semi-join nest cannot actually be an inner part of an
4945       outer join yet, this is just a preparatory step,
4946       ie sj_nest->outer_join_nest() is always NULL here.
4947       @todo: Enable outer joining here later.
4948     */
4949     Item *cond= sj_nest->outer_join_nest() ?
4950       sj_nest->outer_join_nest()->join_cond_optim() : where_cond;
4951     if (!cond)
4952       continue;
4953 
4954     uchar *dummy= NULL;
4955     cond= cond->compile(&Item::equality_substitution_analyzer, &dummy,
4956                         &Item::equality_substitution_transformer,
4957                         (uchar *)sj_nest);
4958     if (cond == NULL)
4959       return true;
4960 
4961     cond->update_used_tables();
4962 
4963     // Loop over all primary tables that follow the materialized table
4964     for (uint j= sjm_exec->mat_table_index + 1; j < primary_tables; j++)
4965     {
4966       JOIN_TAB *const tab= best_ref[j];
4967       for (Key_use *keyuse= tab->position()->key;
4968            keyuse && keyuse->table_ref == tab->table_ref &&
4969            keyuse->key == tab->position()->key->key;
4970            keyuse++)
4971       {
4972         List_iterator<Item> it(sj_nest->nested_join->sj_inner_exprs);
4973         Item *old;
4974         uint fieldno= 0;
4975         while ((old= it++))
4976         {
4977           if (old->real_item()->eq(keyuse->val->real_item(), false))
4978           {
4979             /*
4980               Replace the expression selected from the subquery with the
4981               corresponding column of the materialized temporary table.
4982             */
4983             keyuse->val= sj_nest->nested_join->sjm.mat_fields[fieldno];
4984             keyuse->used_tables= keyuse->val->used_tables();
4985             break;
4986           }
4987           fieldno++;
4988         }
4989       }
4990     }
4991   }
4992 
4993   return false;
4994 }
4995 
4996 
4997 /**
4998   Assign set of available (prefix) tables to all tables in query block.
4999   Also set added tables, ie the tables added in each JOIN_TAB compared to the
5000   previous JOIN_TAB.
5001   This function must be called for every query block after the table order
5002   has been determined.
5003 */
5004 
set_prefix_tables()5005 void JOIN::set_prefix_tables()
5006 {
5007   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
5008   assert(!plan_is_const());
5009   /*
5010     The const tables are available together with the first non-const table in
5011     the join order.
5012   */
5013   table_map const initial_tables_map= const_table_map |
5014     (allow_outer_refs ? OUTER_REF_TABLE_BIT : 0);
5015 
5016   table_map current_tables_map= initial_tables_map;
5017   table_map prev_tables_map= (table_map) 0;
5018   table_map saved_tables_map= (table_map) 0;
5019 
5020   JOIN_TAB *last_non_sjm_tab= NULL; // Track the last non-sjm table
5021 
5022   for (uint i= const_tables; i < tables; i++)
5023   {
5024     JOIN_TAB *const tab= best_ref[i];
5025     if (!tab->table())
5026       continue;
5027     /*
5028       Tables that are within SJ-Materialization nests cannot have their
5029       conditions referring to preceding non-const tables.
5030        - If we're looking at the first SJM table, reset current_tables_map
5031          to refer to only allowed tables
5032       @see Item_equal::get_subst_item()
5033       @see eliminate_item_equal()
5034     */
5035     if (sj_is_materialize_strategy(tab->get_sj_strategy()))
5036     {
5037       const table_map sjm_inner_tables= tab->emb_sj_nest->sj_inner_tables;
5038       if (!(sjm_inner_tables & current_tables_map))
5039       {
5040         saved_tables_map= current_tables_map;
5041         current_tables_map= initial_tables_map;
5042         prev_tables_map= (table_map) 0;
5043       }
5044 
5045       current_tables_map|= tab->table_ref->map();
5046       tab->set_prefix_tables(current_tables_map, prev_tables_map);
5047       prev_tables_map= current_tables_map;
5048 
5049       if (!(sjm_inner_tables & ~current_tables_map))
5050       {
5051         /*
5052           At the end of a semi-join materialization nest,
5053           add non-deterministic expressions to the last table of the nest:
5054         */
5055         tab->add_prefix_tables(RAND_TABLE_BIT);
5056 
5057         // Restore the previous map:
5058         current_tables_map= saved_tables_map;
5059         prev_tables_map= last_non_sjm_tab ?
5060                          last_non_sjm_tab->prefix_tables() : (table_map) 0;
5061       }
5062     }
5063     else
5064     {
5065       last_non_sjm_tab= tab;
5066       current_tables_map|= tab->table_ref->map();
5067       tab->set_prefix_tables(current_tables_map, prev_tables_map);
5068       prev_tables_map= current_tables_map;
5069     }
5070   }
5071   /*
5072     Non-deterministic expressions must be added to the last table's condition.
5073     It solves problem with queries like SELECT * FROM t1 WHERE rand() > 0.5
5074   */
5075   if (last_non_sjm_tab != NULL)
5076     last_non_sjm_tab->add_prefix_tables(RAND_TABLE_BIT);
5077 }
5078 
5079 
5080 /**
5081   Calculate best possible join order and initialize the join structure.
5082 
5083   @return true if success, false if error.
5084 
5085   The JOIN object is populated with statistics about the query,
5086   and a plan with table order and access method selection is made.
5087 
5088   The list of tables to be optimized is taken from select_lex->leaf_tables.
5089   JOIN::where_cond is also used in the optimization.
5090   As a side-effect, JOIN::keyuse_array is populated with key_use information.
5091 
5092   Here is an overview of the logic of this function:
5093 
5094   - Initialize JOIN data structures and setup basic dependencies between tables.
5095 
5096   - Update dependencies based on join information.
5097 
5098   - Make key descriptions (update_ref_and_keys()).
5099 
5100   - Pull out semi-join tables based on table dependencies.
5101 
5102   - Extract tables with zero or one rows as const tables.
5103 
5104   - Read contents of const tables, substitute columns from these tables with
5105     actual data. Also keep track of empty tables vs. one-row tables.
5106 
5107   - After const table extraction based on row count, more tables may
5108     have become functionally dependent. Extract these as const tables.
5109 
5110   - Add new sargable predicates based on retrieved const values.
5111 
5112   - Calculate number of rows to be retrieved from each table.
5113 
5114   - Calculate cost of potential semi-join materializations.
5115 
5116   - Calculate best possible join order based on available statistics.
5117 
5118   - Fill in remaining information for the generated join order.
5119 */
5120 
make_join_plan()5121 bool JOIN::make_join_plan()
5122 {
5123   DBUG_ENTER("JOIN::make_join_plan");
5124 
5125   SARGABLE_PARAM *sargables= NULL;
5126 
5127   Opt_trace_context * const trace= &thd->opt_trace;
5128 
5129   if (init_planner_arrays())           // Create and initialize the arrays
5130     DBUG_RETURN(true);
5131 
5132   // Outer join dependencies were initialized above, now complete the analysis.
5133   if (select_lex->outer_join)
5134     propagate_dependencies();
5135 
5136   if (unlikely(trace->is_started()))
5137     trace_table_dependencies(trace, join_tab, primary_tables);
5138 
5139   // Build the key access information, which is the basis for ref access.
5140   if (where_cond || select_lex->outer_join)
5141   {
5142     if (update_ref_and_keys(thd, &keyuse_array, join_tab, tables, where_cond,
5143                             cond_equal, ~select_lex->outer_join, select_lex,
5144                             &sargables))
5145       DBUG_RETURN(true);
5146   }
5147 
5148   /*
5149     Pull out semi-join tables based on dependencies. Dependencies are valid
5150     throughout the lifetime of a query, so this operation can be performed
5151     on the first optimization only.
5152   */
5153   if (!select_lex->sj_pullout_done && select_lex->sj_nests.elements &&
5154       pull_out_semijoin_tables(this))
5155     DBUG_RETURN(true);
5156 
5157   select_lex->sj_pullout_done= true;
5158   const uint sj_nests= select_lex->sj_nests.elements; // Changed by pull-out
5159 
5160   if (!(select_lex->active_options() & OPTION_NO_CONST_TABLES))
5161   {
5162     // Detect tables that are const (0 or 1 row) and read their contents.
5163     if (extract_const_tables())
5164       DBUG_RETURN(true);
5165 
5166     // Detect tables that are functionally dependent on const values.
5167     if (extract_func_dependent_tables())
5168       DBUG_RETURN(true);
5169   }
5170   // Possibly able to create more sargable predicates from const rows.
5171   if (const_tables && sargables)
5172     update_sargable_from_const(sargables);
5173 
5174   // Make a first estimate of the fanout for each table in the query block.
5175   if (estimate_rowcount())
5176     DBUG_RETURN(true);
5177 
5178   if (sj_nests)
5179   {
5180     set_semijoin_embedding();
5181     select_lex->update_semijoin_strategies(thd);
5182   }
5183 
5184   if (!plan_is_const())
5185     optimize_keyuse();
5186 
5187   allow_outer_refs= true;
5188 
5189   if (sj_nests && optimize_semijoin_nests_for_materialization(this))
5190     DBUG_RETURN(true);
5191 
5192   // Choose the table order based on analysis done so far.
5193   if (Optimize_table_order(thd, this, NULL).choose_table_order())
5194     DBUG_RETURN(true);
5195 
5196   DBUG_EXECUTE_IF("bug13820776_1", thd->killed= THD::KILL_QUERY;);
5197   if (thd->killed || thd->is_error())
5198     DBUG_RETURN(true);
5199 
5200   // If this is a subquery, decide between In-to-exists and materialization
5201   if (unit->item && decide_subquery_strategy())
5202     DBUG_RETURN(true);
5203 
5204   refine_best_rowcount();
5205 
5206   if (!(thd->variables.option_bits & OPTION_BIG_SELECTS) &&
5207       best_read > (double) thd->variables.max_join_size &&
5208       !thd->lex->is_explain())
5209   {						/* purecov: inspected */
5210     my_message(ER_TOO_BIG_SELECT, ER(ER_TOO_BIG_SELECT), MYF(0));
5211     error= -1;
5212     DBUG_RETURN(1);
5213   }
5214 
5215   positions= NULL;  // But keep best_positions for get_best_combination
5216 
5217   /*
5218     Store the cost of this query into a user variable
5219     Don't update m_current_query_cost for statements that are not "flat joins" :
5220     i.e. they have subqueries, unions or call stored procedures.
5221     TODO: calculate a correct cost for a query with subqueries and UNIONs.
5222   */
5223   if (thd->lex->is_single_level_stmt())
5224     thd->m_current_query_cost= best_read;
5225 
5226   // Generate an execution plan from the found optimal join order.
5227   if (get_best_combination())
5228     DBUG_RETURN(true);
5229 
5230   // Cleanup after update_ref_and_keys has added keys for derived tables.
5231   if (select_lex->materialized_derived_table_count)
5232     drop_unused_derived_keys();
5233 
5234   // No need for this struct after new JOIN_TAB array is set up.
5235   best_positions= NULL;
5236 
5237   // Some called function may still set error status unnoticed
5238   if (thd->is_error())
5239     DBUG_RETURN(true);
5240 
5241   // There is at least one empty const table
5242   if (const_table_map != found_const_table_map)
5243     zero_result_cause= "no matching row in const table";
5244 
5245   DBUG_RETURN(false);
5246 }
5247 
5248 
5249 /**
5250   Initialize scratch arrays for the join order optimization
5251 
5252   @returns false if success, true if error
5253 
5254   @note If something fails during initialization, JOIN::cleanup()
5255         will free anything that has been partially allocated and set up.
5256         Arrays are created in the execution mem_root, so they will be
5257         deleted automatically when the mem_root is re-initialized.
5258 */
5259 
init_planner_arrays()5260 bool JOIN::init_planner_arrays()
5261 {
5262   // Up to one extra slot per semi-join nest is needed (if materialized)
5263   const uint sj_nests= select_lex->sj_nests.elements;
5264   const uint table_count= select_lex->leaf_table_count;
5265 
5266   assert(primary_tables == 0 && tables == 0);
5267 
5268   if (!(join_tab= alloc_jtab_array(thd, table_count)))
5269     return true;
5270 
5271   /*
5272     We add 2 cells:
5273     - because planning stage uses 0-termination so needs +1
5274     - because after get_best_combination, we don't use 0-termination but
5275     need +2, to host at most 2 tmp sort/group/distinct tables.
5276   */
5277   if (!(best_ref= (JOIN_TAB **) thd->alloc(sizeof(JOIN_TAB *) *
5278                                            (table_count + sj_nests + 2))))
5279     return true;
5280 
5281   // sort/group tmp tables have no map
5282   if (!(map2table= (JOIN_TAB **) thd->alloc(sizeof(JOIN_TAB *) *
5283                                            (table_count + sj_nests))))
5284     return true;
5285 
5286   if (!(positions= new (thd->mem_root) POSITION[table_count]))
5287     return true;
5288 
5289   if (!(best_positions= new (thd->mem_root) POSITION[table_count+sj_nests]))
5290     return true;
5291 
5292   /*
5293     Initialize data structures for tables to be joined.
5294     Initialize dependencies between tables.
5295   */
5296   JOIN_TAB **best_ref_p= best_ref;
5297   TABLE_LIST *tl= select_lex->leaf_tables;
5298 
5299   for (JOIN_TAB *tab= join_tab;
5300        tl;
5301        tab++, tl= tl->next_leaf, best_ref_p++)
5302   {
5303     *best_ref_p= tab;
5304     TABLE *const table= tl->table;
5305     tab->table_ref= tl;
5306     tab->set_table(table);
5307     const int err= tl->fetch_number_of_rows();
5308 
5309     // Initialize the cost model for the table
5310     table->init_cost_model(cost_model());
5311 
5312     DBUG_EXECUTE_IF("bug11747970_raise_error",
5313                     {
5314                       if (!err)
5315                       {
5316                         my_error(ER_UNKNOWN_ERROR, MYF(0));
5317                         return true;
5318                       }
5319                     });
5320 
5321     if (err)
5322     {
5323       table->file->print_error(err, MYF(0));
5324       return true;
5325     }
5326     table->quick_keys.clear_all();
5327     table->possible_quick_keys.clear_all();
5328     table->reginfo.not_exists_optimize= false;
5329     memset(table->const_key_parts, 0, sizeof(key_part_map)*table->s->keys);
5330     all_table_map|= tl->map();
5331     tab->set_join(this);
5332 
5333     tab->dependent= tl->dep_tables;  // Initialize table dependencies
5334     if (tl->schema_table)
5335       table->file->stats.records= 2;
5336     table->quick_condition_rows= table->file->stats.records;
5337 
5338     tab->init_join_cond_ref(tl);
5339 
5340     if (tl->outer_join_nest())
5341     {
5342       // tab belongs to a nested join, maybe to several embedding joins
5343       tab->embedding_map= 0;
5344       for (TABLE_LIST *embedding= tl->embedding;
5345            embedding;
5346            embedding= embedding->embedding)
5347       {
5348         NESTED_JOIN *const nested_join= embedding->nested_join;
5349         tab->embedding_map|= nested_join->nj_map;
5350         tab->dependent|= embedding->dep_tables;
5351       }
5352     }
5353     else if (tab->join_cond())
5354     {
5355       // tab is the only inner table of an outer join
5356       tab->embedding_map= 0;
5357       for (TABLE_LIST *embedding= tl->embedding;
5358            embedding;
5359            embedding= embedding->embedding)
5360         tab->embedding_map|= embedding->nested_join->nj_map;
5361     }
5362     tables++;                     // Count number of initialized tables
5363   }
5364 
5365   primary_tables= tables;
5366   *best_ref_p= NULL;              // Last element of array must be NULL
5367 
5368   return false;
5369 }
5370 
5371 
5372 /**
5373   Propagate dependencies between tables due to outer join relations.
5374 
5375   @returns false if success, true if error
5376 
5377   Build transitive closure for relation 'to be dependent on'.
5378   This will speed up the plan search for many cases with outer joins,
5379   as well as allow us to catch illegal cross references.
5380   Warshall's algorithm is used to build the transitive closure.
5381   As we may restart the outer loop upto 'table_count' times, the
5382   complexity of the algorithm is O((number of tables)^3).
5383   However, most of the iterations will be shortcircuited when
5384   there are no dependencies to propagate.
5385 */
5386 
propagate_dependencies()5387 bool JOIN::propagate_dependencies()
5388 {
5389   for (uint i= 0; i < tables; i++)
5390   {
5391     if (!join_tab[i].dependent)
5392       continue;
5393 
5394     // Add my dependencies to other tables depending on me
5395     uint j;
5396     JOIN_TAB *tab;
5397     for (j= 0, tab= join_tab; j < tables; j++, tab++)
5398     {
5399       if (tab->dependent & join_tab[i].table_ref->map())
5400       {
5401         const table_map was_dependent= tab->dependent;
5402         tab->dependent|= join_tab[i].dependent;
5403         /*
5404           If we change dependencies for a table we already have
5405           processed: Redo dependency propagation from this table.
5406         */
5407         if (i > j && tab->dependent != was_dependent)
5408         {
5409           i= j-1;
5410           break;
5411         }
5412       }
5413     }
5414   }
5415 
5416   JOIN_TAB *const tab_end= join_tab + tables;
5417   for (JOIN_TAB *tab= join_tab; tab < tab_end; tab++)
5418   {
5419     /*
5420       Catch illegal cross references for outer joins.
5421       This could happen before WL#2486 was implemented in 5.0, but should no
5422       longer be possible.
5423       Thus, an assert has been added should this happen again.
5424       @todo Remove the error check below.
5425     */
5426     assert(!(tab->dependent & tab->table_ref->map()));
5427 
5428     if (tab->dependent & tab->table_ref->map())
5429     {
5430       tables= 0;               // Don't use join->table
5431       primary_tables= 0;
5432       my_message(ER_WRONG_OUTER_JOIN, ER(ER_WRONG_OUTER_JOIN), MYF(0));
5433       return true;
5434     }
5435 
5436     tab->key_dependent= tab->dependent;
5437   }
5438 
5439   return false;
5440 }
5441 
5442 
5443 /**
5444   Extract const tables based on row counts.
5445 
5446   @returns false if success, true if error
5447 
5448   This extraction must be done for each execution.
5449   Tables containing exactly zero or one rows are marked as const, but
5450   notice the additional constraints checked below.
5451   Tables that are extracted have their rows read before actual execution
5452   starts and are placed in the beginning of the join_tab array.
5453   Thus, they do not take part in join order optimization process,
5454   which can significantly reduce the optimization time.
5455   The data read from these tables can also be regarded as "constant"
5456   throughout query execution, hence the column values can be used for
5457   additional constant propagation and extraction of const tables based
5458   on eq-ref properties.
5459 
5460   The tables are given the type JT_SYSTEM.
5461 */
5462 
extract_const_tables()5463 bool JOIN::extract_const_tables()
5464 {
5465   enum enum_const_table_extraction
5466   {
5467      extract_no_table=    0,
5468      extract_empty_table= 1,
5469      extract_const_table= 2
5470   };
5471 
5472   JOIN_TAB *const tab_end= join_tab + tables;
5473   for (JOIN_TAB *tab= join_tab; tab < tab_end; tab++)
5474   {
5475     TABLE      *const table= tab->table();
5476     TABLE_LIST *const tl= tab->table_ref;
5477     enum enum_const_table_extraction extract_method= extract_const_table;
5478 
5479     const bool all_partitions_pruned_away= table->all_partitions_pruned_away;
5480 
5481     if (tl->outer_join_nest())
5482     {
5483       /*
5484         Table belongs to a nested join, no candidate for const table extraction.
5485       */
5486       extract_method= extract_no_table;
5487     }
5488     else if (tl->embedding && tl->embedding->sj_cond())
5489     {
5490       /*
5491         Table belongs to a semi-join.
5492         We do not currently pull out const tables from semi-join nests.
5493       */
5494       extract_method= extract_no_table;
5495     }
5496     else if (tab->join_cond())
5497     {
5498       // tab is the only inner table of an outer join, extract empty tables
5499       extract_method= extract_empty_table;
5500     }
5501     switch (extract_method)
5502     {
5503     case extract_no_table:
5504       break;
5505 
5506     case extract_empty_table:
5507       // Extract tables with zero rows, but only if statistics are exact
5508       if ((table->file->stats.records == 0 ||
5509            all_partitions_pruned_away) &&
5510           (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT))
5511         mark_const_table(tab, NULL);
5512       break;
5513 
5514     case extract_const_table:
5515       /*
5516         Extract tables with zero or one rows, but do not extract tables that
5517          1. are dependent upon other tables, or
5518          2. have no exact statistics, or
5519          3. are full-text searched
5520       */
5521       if ((table->s->system ||
5522            table->file->stats.records <= 1 ||
5523            all_partitions_pruned_away) &&
5524           !tab->dependent &&                                             // 1
5525           (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) && // 2
5526           !table->fulltext_searched)                                     // 3
5527         mark_const_table(tab, NULL);
5528       break;
5529     }
5530   }
5531 
5532   // Read const tables (tables matching no more than 1 rows)
5533   if (!const_tables)
5534     return false;
5535 
5536   for (POSITION *p_pos= positions, *p_end= p_pos + const_tables;
5537        p_pos < p_end;
5538        p_pos++)
5539   {
5540     JOIN_TAB *const tab= p_pos->table;
5541     const int status= join_read_const_table(tab, p_pos);
5542     if (status > 0)
5543       return true;
5544     else if (status == 0)
5545     {
5546       found_const_table_map|= tab->table_ref->map();
5547       tab->table_ref->optimized_away= true;
5548     }
5549   }
5550 
5551   return false;
5552 }
5553 
5554 /**
5555   Extract const tables based on functional dependencies.
5556 
5557   @returns false if success, true if error
5558 
5559   This extraction must be done for each execution.
5560 
5561   Mark as const the tables that
5562    - are functionally dependent on constant values, or
5563    - are inner tables of an outer join and contain exactly zero or one rows
5564 
5565   Tables that are extracted have their rows read before actual execution
5566   starts and are placed in the beginning of the join_tab array, just as
5567   described for JOIN::extract_const_tables().
5568 
5569   The tables are given the type JT_CONST.
5570 */
5571 
extract_func_dependent_tables()5572 bool JOIN::extract_func_dependent_tables()
5573 {
5574   // loop until no more const tables are found
5575   bool ref_changed;
5576   table_map found_ref;
5577   do
5578   {
5579   more_const_tables_found:
5580     ref_changed = false;
5581     found_ref= 0;
5582 
5583     // Loop over all tables that are not already determined to be const
5584     for (JOIN_TAB **pos= best_ref + const_tables; *pos; pos++)
5585     {
5586       JOIN_TAB *const tab= *pos;
5587       TABLE *const table= tab->table();
5588       TABLE_LIST *const tl= tab->table_ref;
5589       /*
5590         If equi-join condition by a key is null rejecting and after a
5591         substitution of a const table the key value happens to be null
5592         then we can state that there are no matches for this equi-join.
5593       */
5594       Key_use *keyuse= tab->keyuse();
5595       if (keyuse && tab->join_cond() && !tab->embedding_map)
5596       {
5597         /*
5598           When performing an outer join operation if there are no matching rows
5599           for the single row of the outer table all the inner tables are to be
5600           null complemented and thus considered as constant tables.
5601           Here we apply this consideration to the case of outer join operations
5602           with a single inner table only because the case with nested tables
5603           would require a more thorough analysis.
5604           TODO. Apply single row substitution to null complemented inner tables
5605           for nested outer join operations.
5606 	*/
5607         while (keyuse->table_ref == tl)
5608         {
5609           if (!(keyuse->val->used_tables() & ~const_table_map) &&
5610               keyuse->val->is_null() && keyuse->null_rejecting)
5611           {
5612             table->set_null_row();
5613             table->const_table= true;
5614             found_const_table_map|= tl->map();
5615             mark_const_table(tab, keyuse);
5616             goto more_const_tables_found;
5617            }
5618 	  keyuse++;
5619         }
5620       }
5621 
5622       if (tab->dependent)              // If dependent on some table
5623       {
5624         // All dependent tables must be const
5625         if (tab->dependent & ~const_table_map)
5626           continue;
5627         /*
5628           Mark a dependent table as constant if
5629            1. it has exactly zero or one rows (it is a system table), and
5630            2. it is not within a nested outer join, and
5631            3. it does not have an expensive outer join condition.
5632               This is because we have to determine whether an outer-joined table
5633               has a real row or a null-extended row in the optimizer phase.
5634               We have no possibility to evaluate its join condition at
5635               execution time, when it is marked as a system table.
5636         */
5637 	if (table->file->stats.records <= 1L &&                            // 1
5638             (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) && // 1
5639             !tl->outer_join_nest() &&                                      // 2
5640             !(tab->join_cond() && tab->join_cond()->is_expensive()))   // 3
5641 	{                              // system table
5642           mark_const_table(tab, NULL);
5643           const int status=
5644             join_read_const_table(tab, positions + const_tables - 1);
5645           if (status > 0)
5646             return true;
5647           else if (status == 0)
5648             found_const_table_map|= tl->map();
5649           continue;
5650         }
5651       }
5652 
5653       // Check if table can be read by key or table only uses const refs
5654 
5655       if ((keyuse= tab->keyuse()))
5656       {
5657         while (keyuse->table_ref == tl)
5658         {
5659           Key_use *const start_keyuse= keyuse;
5660           const uint key= keyuse->key;
5661           tab->keys().set_bit(key);               // QQ: remove this ?
5662 
5663           table_map refs= 0;
5664           key_map const_ref, eq_part;
5665           do
5666           {
5667             if (keyuse->val->type() != Item::NULL_ITEM && !keyuse->optimize)
5668             {
5669               if (!((~found_const_table_map) & keyuse->used_tables))
5670                 const_ref.set_bit(keyuse->keypart);
5671               else
5672                 refs|= keyuse->used_tables;
5673               eq_part.set_bit(keyuse->keypart);
5674             }
5675             keyuse++;
5676           } while (keyuse->table_ref == tl && keyuse->key == key);
5677 
5678           /*
5679             Extract const tables with proper key dependencies.
5680             Exclude tables that
5681              1. are full-text searched, or
5682              2. are part of nested outer join, or
5683              3. are part of semi-join, or
5684              4. have an expensive outer join condition.
5685              5. are blocked by handler for const table optimize.
5686           */
5687           if (eq_part.is_prefix(table->key_info[key].user_defined_key_parts) &&
5688               !table->fulltext_searched &&                           // 1
5689               !tl->outer_join_nest() &&                              // 2
5690               !(tl->embedding && tl->embedding->sj_cond()) &&        // 3
5691               !(tab->join_cond() && tab->join_cond()->is_expensive()) &&// 4
5692               !(table->file->ha_table_flags() & HA_BLOCK_CONST_TABLE))  // 5
5693           {
5694             if (table->key_info[key].flags & HA_NOSAME)
5695             {
5696               if (const_ref == eq_part)
5697               {                        // Found everything for ref.
5698                 ref_changed = true;
5699                 mark_const_table(tab, start_keyuse);
5700                 if (create_ref_for_key(this, tab, start_keyuse,
5701                                        found_const_table_map))
5702                   return true;
5703                 const int status=
5704                   join_read_const_table(tab, positions + const_tables - 1);
5705                 if (status > 0)
5706                   return true;
5707                 else if (status == 0)
5708                   found_const_table_map|= tl->map();
5709                 break;
5710               }
5711               else
5712                 found_ref|= refs;       // Table is const if all refs are const
5713             }
5714             else if (const_ref == eq_part)
5715               tab->const_keys.set_bit(key);
5716           }
5717 	}
5718       }
5719     }
5720   } while ((const_table_map & found_ref) && ref_changed);
5721 
5722   return false;
5723 }
5724 
5725 /**
5726   Update info on indexes that can be used for search lookups as
5727   reading const tables may has added new sargable predicates.
5728 */
5729 
update_sargable_from_const(SARGABLE_PARAM * sargables)5730 void JOIN::update_sargable_from_const(SARGABLE_PARAM *sargables)
5731 {
5732   for ( ; sargables->field; sargables++)
5733   {
5734     Field *const field= sargables->field;
5735     JOIN_TAB *const tab= field->table->reginfo.join_tab;
5736     key_map possible_keys= field->key_start;
5737     possible_keys.intersect(field->table->keys_in_use_for_query);
5738     bool is_const= true;
5739     for (uint j= 0; j < sargables->num_values; j++)
5740       is_const&= sargables->arg_value[j]->const_item();
5741     if (is_const)
5742     {
5743       tab->const_keys.merge(possible_keys);
5744       tab->keys().merge(possible_keys);
5745     }
5746   }
5747 }
5748 
5749 
5750 /**
5751   Estimate the number of matched rows for each joined table.
5752   Set up range scan for tables that have proper predicates.
5753 
5754   @returns false if success, true if error
5755 */
5756 
estimate_rowcount()5757 bool JOIN::estimate_rowcount()
5758 {
5759   Opt_trace_context *const trace= &thd->opt_trace;
5760   Opt_trace_object trace_wrapper(trace);
5761   Opt_trace_array trace_records(trace, "rows_estimation");
5762 
5763   JOIN_TAB *const tab_end= join_tab + tables;
5764   for (JOIN_TAB *tab= join_tab; tab < tab_end; tab++)
5765   {
5766     const Cost_model_table *const cost_model= tab->table()->cost_model();
5767     Opt_trace_object trace_table(trace);
5768     trace_table.add_utf8_table(tab->table_ref);
5769     if (tab->type() == JT_SYSTEM || tab->type() == JT_CONST)
5770     {
5771       trace_table.add("rows", 1).add("cost", 1)
5772         .add_alnum("table_type", (tab->type() == JT_SYSTEM) ? "system": "const")
5773         .add("empty", tab->table()->has_null_row());
5774 
5775       // Only one matching row and one block to read
5776       tab->set_records(tab->found_records= 1);
5777       tab->worst_seeks= cost_model->page_read_cost(1.0);
5778       tab->read_time= static_cast<ha_rows>(tab->worst_seeks);
5779       continue;
5780     }
5781     // Approximate number of found rows and cost to read them
5782     tab->set_records(tab->found_records= tab->table()->file->stats.records);
5783     const Cost_estimate table_scan_time= tab->table()->file->table_scan_cost();
5784     tab->read_time= static_cast<ha_rows>(table_scan_time.total_cost());
5785 
5786     /*
5787       Set a max value for the cost of seek operations we can expect
5788       when using key lookup. This can't be too high as otherwise we
5789       are likely to use table scan.
5790     */
5791     tab->worst_seeks=
5792       min(cost_model->page_read_cost((double) tab->found_records / 10),
5793           (double) tab->read_time * 3);
5794     const double min_worst_seek= cost_model->page_read_cost(2.0);
5795     if (tab->worst_seeks < min_worst_seek)      // Fix for small tables
5796       tab->worst_seeks= min_worst_seek;
5797 
5798     /*
5799       Add to tab->const_keys those indexes for which all group fields or
5800       all select distinct fields participate in one index.
5801     */
5802     add_group_and_distinct_keys(this, tab);
5803 
5804     /*
5805       Perform range analysis if there are keys it could use (1).
5806       Don't do range analysis if on the inner side of an outer join (2).
5807       Do range analysis if on the inner side of a semi-join (3).
5808     */
5809     TABLE_LIST *const tl= tab->table_ref;
5810     if (!tab->const_keys.is_clear_all() &&                        // (1)
5811         (!tl->embedding ||                                        // (2)
5812          (tl->embedding && tl->embedding->sj_cond())))            // (3)
5813     {
5814       /*
5815         This call fills tab->quick() with the best QUICK access method
5816         possible for this table, and only if it's better than table scan.
5817         It also fills tab->needed_reg.
5818       */
5819       ha_rows records= get_quick_record_count(thd, tab, row_limit);
5820 
5821       if (records == 0 && thd->is_error())
5822         return true;
5823 
5824       /*
5825         Check for "impossible range", but make sure that we do not attempt
5826         to mark semi-joined tables as "const" (only semi-joined tables that
5827         are functionally dependent can be marked "const", and subsequently
5828         pulled out of their semi-join nests).
5829       */
5830       if (records == 0 &&
5831           tab->table()->reginfo.impossible_range &&
5832           (!(tl->embedding && tl->embedding->sj_cond())))
5833       {
5834         /*
5835           Impossible WHERE condition or join condition
5836           In case of join cond, mark that one empty NULL row is matched.
5837           In case of WHERE, don't set found_const_table_map to get the
5838           caller to abort with a zero row result.
5839         */
5840         mark_const_table(tab, NULL);
5841         tab->set_type(JT_CONST);  // Override setting made in mark_const_table()
5842         if (tab->join_cond())
5843         {
5844           // Generate an empty row
5845           trace_table.add("returning_empty_null_row", true).
5846             add_alnum("cause", "impossible_on_condition");
5847           found_const_table_map|= tl->map();
5848           tab->table()->set_null_row();  // All fields are NULL
5849         }
5850         else
5851         {
5852           trace_table.add("rows", 0).
5853             add_alnum("cause", "impossible_where_condition");
5854         }
5855       }
5856       if (records != HA_POS_ERROR)
5857       {
5858         tab->found_records= records;
5859         tab->read_time= (ha_rows) (tab->quick() ?
5860                                    tab->quick()->cost_est.total_cost() : 0.0);
5861       }
5862     }
5863     else
5864     {
5865       Opt_trace_object(trace, "table_scan").
5866         add("rows", tab->found_records).
5867         add("cost", tab->read_time);
5868     }
5869   }
5870 
5871   return false;
5872 }
5873 
5874 
5875 /**
5876   Set semi-join embedding join nest pointers.
5877 
5878   Set pointer to embedding semi-join nest for all semi-joined tables.
5879   Note that this must be done for every table inside all semi-join nests,
5880   even for tables within outer join nests embedded in semi-join nests.
5881   A table can never be part of multiple semi-join nests, hence no
5882   ambiguities can ever occur.
5883   Note also that the pointer is not set for TABLE_LIST objects that
5884   are outer join nests within semi-join nests.
5885 */
5886 
set_semijoin_embedding()5887 void JOIN::set_semijoin_embedding()
5888 {
5889   assert(!select_lex->sj_nests.is_empty());
5890 
5891   JOIN_TAB *const tab_end= join_tab + primary_tables;
5892 
5893   for (JOIN_TAB *tab= join_tab; tab < tab_end; tab++)
5894   {
5895     for (TABLE_LIST *tl= tab->table_ref; tl->embedding; tl= tl->embedding)
5896     {
5897       if (tl->embedding->sj_cond())
5898       {
5899         tab->emb_sj_nest= tl->embedding;
5900         break;
5901       }
5902     }
5903   }
5904 }
5905 
5906 
5907 /**
5908   @brief Check if semijoin's compared types allow materialization.
5909 
5910   @param[inout] sj_nest Semi-join nest containing information about correlated
5911          expressions. Set nested_join->sjm.scan_allowed to TRUE if
5912          MaterializeScan strategy allowed. Set nested_join->sjm.lookup_allowed
5913          to TRUE if MaterializeLookup strategy allowed
5914 
5915   @details
5916     This is a temporary fix for BUG#36752.
5917 
5918     There are two subquery materialization strategies for semijoin:
5919 
5920     1. Materialize and do index lookups in the materialized table. See
5921        BUG#36752 for description of restrictions we need to put on the
5922        compared expressions.
5923 
5924        In addition, since indexes are not supported for BLOB columns,
5925        this strategy can not be used if any of the columns in the
5926        materialized table will be BLOB/GEOMETRY columns.  (Note that
5927        also columns for non-BLOB values that may be greater in size
5928        than CONVERT_IF_BIGGER_TO_BLOB, will be represented as BLOB
5929        columns.)
5930 
5931     2. Materialize and then do a full scan of the materialized table.
5932        The same criteria as for MaterializeLookup are applied, except that
5933        BLOB/GEOMETRY columns are allowed.
5934 */
5935 
5936 static
semijoin_types_allow_materialization(TABLE_LIST * sj_nest)5937 void semijoin_types_allow_materialization(TABLE_LIST *sj_nest)
5938 {
5939   DBUG_ENTER("semijoin_types_allow_materialization");
5940 
5941   assert(sj_nest->nested_join->sj_outer_exprs.elements ==
5942          sj_nest->nested_join->sj_inner_exprs.elements);
5943 
5944   if (sj_nest->nested_join->sj_outer_exprs.elements > MAX_REF_PARTS)
5945   {
5946     sj_nest->nested_join->sjm.scan_allowed= false;
5947     sj_nest->nested_join->sjm.lookup_allowed= false;
5948     DBUG_VOID_RETURN;
5949   }
5950 
5951   List_iterator<Item> it1(sj_nest->nested_join->sj_outer_exprs);
5952   List_iterator<Item> it2(sj_nest->nested_join->sj_inner_exprs);
5953 
5954   sj_nest->nested_join->sjm.scan_allowed= true;
5955   sj_nest->nested_join->sjm.lookup_allowed= true;
5956 
5957   bool blobs_involved= false;
5958   Item *outer, *inner;
5959   uint total_lookup_index_length= 0;
5960   uint max_key_length;
5961   uint max_key_part_length;
5962   /*
5963     Maximum lengths for keys and key parts that are supported by
5964     the temporary table storage engine(s).
5965   */
5966   get_max_key_and_part_length(&max_key_length,
5967                               &max_key_part_length);
5968   while (outer= it1++, inner= it2++)
5969   {
5970     assert(outer->real_item() && inner->real_item());
5971     if (!types_allow_materialization(outer, inner))
5972     {
5973       sj_nest->nested_join->sjm.scan_allowed= false;
5974       sj_nest->nested_join->sjm.lookup_allowed= false;
5975       DBUG_VOID_RETURN;
5976     }
5977     blobs_involved|= inner->is_blob_field();
5978 
5979     // Calculate the index length of materialized table
5980     const uint lookup_index_length= get_key_length_tmp_table(inner);
5981     if (lookup_index_length > max_key_part_length)
5982       sj_nest->nested_join->sjm.lookup_allowed= false;
5983     total_lookup_index_length+= lookup_index_length ;
5984   }
5985   if (total_lookup_index_length > max_key_length)
5986     sj_nest->nested_join->sjm.lookup_allowed= false;
5987 
5988   if (blobs_involved)
5989     sj_nest->nested_join->sjm.lookup_allowed= false;
5990 
5991   if (sj_nest->embedding)
5992   {
5993     assert(sj_nest->embedding->join_cond_optim());
5994     /*
5995       There are two issues that prevent materialization strategy from being
5996       used when a semi-join nest is on the inner side of an outer join:
5997       1. If the semi-join contains dependencies to outer tables,
5998          materialize-scan strategy cannot be used.
5999       2. Make sure that executor is able to evaluate triggered conditions
6000          for semi-join materialized tables. It should be correct, but needs
6001          verification.
6002          TODO: Remove this limitation!
6003       Handle this by disabling materialization strategies:
6004     */
6005     sj_nest->nested_join->sjm.scan_allowed= false;
6006     sj_nest->nested_join->sjm.lookup_allowed= false;
6007     DBUG_VOID_RETURN;
6008   }
6009 
6010   DBUG_PRINT("info",("semijoin_types_allow_materialization: ok, allowed"));
6011 
6012   DBUG_VOID_RETURN;
6013 }
6014 
6015 
6016 /*****************************************************************************
6017   Create JOIN_TABS, make a guess about the table types,
6018   Approximate how many records will be used in each table
6019 *****************************************************************************/
6020 
6021 /**
6022   Returns estimated number of rows that could be fetched by given
6023   access method.
6024 
6025   The function calls the range optimizer to estimate the cost of the
6026   cheapest QUICK_* index access method to scan one or several of the
6027   'keys' using the conditions 'select->cond'. The range optimizer
6028   compares several different types of 'quick select' methods (range
6029   scan, index merge, loose index scan) and selects the cheapest one.
6030 
6031   If the best index access method is cheaper than a table- and an index
6032   scan, then the range optimizer also constructs the corresponding
6033   QUICK_* object and assigns it to select->quick. In most cases this
6034   is the QUICK_* object used at later (optimization and execution)
6035   phases.
6036 
6037   @param thd    Session that runs the query.
6038   @param tab    JOIN_TAB of source table.
6039   @param limit  maximum number of rows to select.
6040 
6041   @note
6042     In case of valid range, a QUICK_SELECT_I object will be constructed and
6043     saved in select->quick.
6044 
6045   @return Estimated number of result rows selected from 'tab'.
6046 
6047   @retval HA_POS_ERROR For derived tables/views or if an error occur.
6048   @retval 0            If impossible query (i.e. certainly no rows will be
6049                        selected.)
6050 */
get_quick_record_count(THD * thd,JOIN_TAB * tab,ha_rows limit)6051 static ha_rows get_quick_record_count(THD *thd, JOIN_TAB *tab, ha_rows limit)
6052 {
6053   DBUG_ENTER("get_quick_record_count");
6054   uchar buff[STACK_BUFF_ALLOC];
6055   if (check_stack_overrun(thd, STACK_MIN_SIZE, buff))
6056     DBUG_RETURN(0);                           // Fatal error flag is set
6057 
6058   TABLE_LIST *const tl= tab->table_ref;
6059 
6060   // Derived tables aren't filled yet, so no stats are available.
6061   if (!tl->uses_materialization())
6062   {
6063     QUICK_SELECT_I *qck;
6064     int error= test_quick_select(thd,
6065                                  tab->const_keys,
6066                                  0,      //empty table_map
6067                                  limit,
6068                                  false,  //don't force quick range
6069                                  ORDER::ORDER_NOT_RELEVANT, tab,
6070                                  tab->join_cond() ? tab->join_cond() :
6071                                  tab->join()->where_cond,
6072                                  &tab->needed_reg, &qck, tab->table()->force_index);
6073     tab->set_quick(qck);
6074 
6075     if (error == 1)
6076       DBUG_RETURN(qck->records);
6077     if (error == -1)
6078     {
6079       tl->table->reginfo.impossible_range=1;
6080       DBUG_RETURN(0);
6081     }
6082     DBUG_PRINT("warning",("Couldn't use record count on const keypart"));
6083   }
6084   else if (tl->materializable_is_const())
6085   {
6086     DBUG_RETURN(tl->derived_unit()->query_result()->estimated_rowcount);
6087   }
6088   DBUG_RETURN(HA_POS_ERROR);
6089 }
6090 
6091 /*
6092   Get estimated record length for semi-join materialization temptable
6093 
6094   SYNOPSIS
6095     get_tmp_table_rec_length()
6096       items  IN subquery's select list.
6097 
6098   DESCRIPTION
6099     Calculate estimated record length for semi-join materialization
6100     temptable. It's an estimate because we don't follow every bit of
6101     create_tmp_table()'s logic. This isn't necessary as the return value of
6102     this function is used only for cost calculations.
6103 
6104   RETURN
6105     Length of the temptable record, in bytes
6106 */
6107 
get_tmp_table_rec_length(List<Item> & items)6108 static uint get_tmp_table_rec_length(List<Item> &items)
6109 {
6110   uint len= 0;
6111   Item *item;
6112   List_iterator<Item> it(items);
6113   while ((item= it++))
6114   {
6115     switch (item->result_type()) {
6116     case REAL_RESULT:
6117       len += sizeof(double);
6118       break;
6119     case INT_RESULT:
6120       if (item->max_length >= (MY_INT32_NUM_DECIMAL_DIGITS - 1))
6121         len += 8;
6122       else
6123         len += 4;
6124       break;
6125     case STRING_RESULT:
6126       /* DATE/TIME and GEOMETRY fields have STRING_RESULT result type.  */
6127       if (item->is_temporal() || item->field_type() == MYSQL_TYPE_GEOMETRY)
6128         len += 8;
6129       else
6130         len += item->max_length;
6131       break;
6132     case DECIMAL_RESULT:
6133       len += 10;
6134       break;
6135     case ROW_RESULT:
6136     default:
6137       assert(0); /* purecov: deadcode */
6138       break;
6139     }
6140   }
6141   return len;
6142 }
6143 
6144 
6145 /**
6146    Writes to the optimizer trace information about dependencies between
6147    tables.
6148    @param trace  optimizer trace
6149    @param join_tabs  all JOIN_TABs of the join
6150    @param table_count how many JOIN_TABs in the 'join_tabs' array
6151 */
trace_table_dependencies(Opt_trace_context * trace,JOIN_TAB * join_tabs,uint table_count)6152 static void trace_table_dependencies(Opt_trace_context * trace,
6153                                      JOIN_TAB *join_tabs,
6154                                      uint table_count)
6155 {
6156   Opt_trace_object trace_wrapper(trace);
6157   Opt_trace_array trace_dep(trace, "table_dependencies");
6158   for (uint i= 0 ; i < table_count ; i++)
6159   {
6160     TABLE_LIST *table_ref= join_tabs[i].table_ref;
6161     Opt_trace_object trace_one_table(trace);
6162     trace_one_table.add_utf8_table(table_ref).
6163       add("row_may_be_null", table_ref->table->is_nullable());
6164     const table_map map= table_ref->map();
6165     assert(map < (1ULL << table_count));
6166     for (uint j= 0; j < table_count; j++)
6167     {
6168       if (map & (1ULL << j))
6169       {
6170         trace_one_table.add("map_bit", j);
6171         break;
6172       }
6173     }
6174     Opt_trace_array depends_on(trace, "depends_on_map_bits");
6175     // RAND_TABLE_BIT may be in join_tabs[i].dependent, so we test all 64 bits
6176     compile_time_assert(sizeof(table_ref->map()) <= 64);
6177     for (uint j= 0; j < 64; j++)
6178     {
6179       if (join_tabs[i].dependent & (1ULL << j))
6180         depends_on.add(j);
6181     }
6182   }
6183 }
6184 
6185 
6186 /**
6187   Add to join_tab[i]->condition() "table.field IS NOT NULL" conditions
6188   we've inferred from ref/eq_ref access performed.
6189 
6190     This function is a part of "Early NULL-values filtering for ref access"
6191     optimization.
6192 
6193     Example of this optimization:
6194     For query SELECT * FROM t1,t2 WHERE t2.key=t1.field @n
6195     and plan " any-access(t1), ref(t2.key=t1.field) " @n
6196     add "t1.field IS NOT NULL" to t1's table condition. @n
6197 
6198     Description of the optimization:
6199 
6200       We look through equalities choosen to perform ref/eq_ref access,
6201       pick equalities that have form "tbl.part_of_key = othertbl.field"
6202       (where othertbl is a non-const table and othertbl.field may be NULL)
6203       and add them to conditions on correspoding tables (othertbl in this
6204       example).
6205 
6206       Exception from that is the case when referred_tab->join != join.
6207       I.e. don't add NOT NULL constraints from any embedded subquery.
6208       Consider this query:
6209       @code
6210       SELECT A.f2 FROM t1 LEFT JOIN t2 A ON A.f2 = f1
6211       WHERE A.f3=(SELECT MIN(f3) FROM  t2 C WHERE A.f4 = C.f4) OR A.f3 IS NULL;
6212       @endcode
6213       Here condition A.f3 IS NOT NULL is going to be added to the WHERE
6214       condition of the embedding query.
6215       Another example:
6216       SELECT * FROM t10, t11 WHERE (t10.a < 10 OR t10.a IS NULL)
6217       AND t11.b <=> t10.b AND (t11.a = (SELECT MAX(a) FROM t12
6218       WHERE t12.b = t10.a ));
6219       Here condition t10.a IS NOT NULL is going to be added.
6220       In both cases addition of NOT NULL condition will erroneously reject
6221       some rows of the result set.
6222       referred_tab->join != join constraint would disallow such additions.
6223 
6224       This optimization doesn't affect the choices that ref, range, or join
6225       optimizer make. This was intentional because this was added after 4.1
6226       was GA.
6227 
6228     Implementation overview
6229       1. update_ref_and_keys() accumulates info about null-rejecting
6230          predicates in in Key_field::null_rejecting
6231       1.1 add_key_part saves these to Key_use.
6232       2. create_ref_for_key copies them to TABLE_REF.
6233       3. add_not_null_conds adds "x IS NOT NULL" to join_tab->m_condition of
6234          appropiate JOIN_TAB members.
6235 */
6236 
add_not_null_conds(JOIN * join)6237 static void add_not_null_conds(JOIN *join)
6238 {
6239   DBUG_ENTER("add_not_null_conds");
6240   ASSERT_BEST_REF_IN_JOIN_ORDER(join);
6241   for (uint i=join->const_tables ; i < join->tables ; i++)
6242   {
6243     JOIN_TAB *const tab= join->best_ref[i];
6244     if ((tab->type() == JT_REF || tab->type() == JT_EQ_REF ||
6245          tab->type() == JT_REF_OR_NULL) &&
6246         !tab->table()->is_nullable())
6247     {
6248       for (uint keypart= 0; keypart < tab->ref().key_parts; keypart++)
6249       {
6250         if (tab->ref().null_rejecting & ((key_part_map)1 << keypart))
6251         {
6252           Item *item= tab->ref().items[keypart];
6253           Item *notnull;
6254           Item *real= item->real_item();
6255           assert(real->type() == Item::FIELD_ITEM);
6256           Item_field *not_null_item= (Item_field*)real;
6257           JOIN_TAB *referred_tab= not_null_item->field->table->reginfo.join_tab;
6258           /*
6259             For UPDATE queries such as:
6260             UPDATE t1 SET t1.f2=(SELECT MAX(t2.f4) FROM t2 WHERE t2.f3=t1.f1);
6261             not_null_item is the t1.f1, but it's referred_tab is 0.
6262           */
6263           if (!referred_tab || referred_tab->join() != join)
6264             continue;
6265           if (!(notnull= new Item_func_isnotnull(not_null_item)))
6266             DBUG_VOID_RETURN;
6267           /*
6268             We need to do full fix_fields() call here in order to have correct
6269             notnull->const_item(). This is needed e.g. by test_quick_select
6270             when it is called from make_join_select after this function is
6271             called.
6272           */
6273           if (notnull->fix_fields(join->thd, &notnull))
6274             DBUG_VOID_RETURN;
6275           DBUG_EXECUTE("where",print_where(notnull,
6276                                            referred_tab->table()->alias,
6277                                            QT_ORDINARY););
6278           referred_tab->and_with_condition(notnull);
6279         }
6280       }
6281     }
6282   }
6283   DBUG_VOID_RETURN;
6284 }
6285 
6286 
6287 /**
6288   Check if given expression only uses fields covered by index #keyno in the
6289   table tbl. The expression can use any fields in any other tables.
6290 
6291   The expression is guaranteed not to be AND or OR - those constructs are
6292   handled outside of this function.
6293 
6294   Restrict some function types from being pushed down to storage engine:
6295   a) Don't push down the triggered conditions. Nested outer joins execution
6296      code may need to evaluate a condition several times (both triggered and
6297      untriggered).
6298   b) Stored functions contain a statement that might start new operations (like
6299      DML statements) from within the storage engine. This does not work against
6300      all SEs.
6301   c) Subqueries might contain nested subqueries and involve more tables.
6302 
6303   @param  item           Expression to check
6304   @param  tbl            The table having the index
6305   @param  keyno          The index number
6306   @param  other_tbls_ok  TRUE <=> Fields of other non-const tables are allowed
6307 
6308   @return false if No, true if Yes
6309 */
6310 
uses_index_fields_only(Item * item,TABLE * tbl,uint keyno,bool other_tbls_ok)6311 bool uses_index_fields_only(Item *item, TABLE *tbl, uint keyno,
6312                             bool other_tbls_ok)
6313 {
6314   // Restrictions b and c.
6315   if (item->has_stored_program() || item->has_subquery())
6316     return false;
6317 
6318   if (item->const_item())
6319     return true;
6320 
6321   const Item::Type item_type= item->type();
6322 
6323   switch (item_type) {
6324   case Item::FUNC_ITEM:
6325     {
6326       Item_func *item_func= (Item_func*)item;
6327       const Item_func::Functype func_type= item_func->functype();
6328 
6329       /*
6330         Restriction a.
6331         TODO: Consider cloning the triggered condition and using the copies
6332         for:
6333         1. push the first copy down, to have most restrictive index condition
6334            possible.
6335         2. Put the second copy into tab->m_condition.
6336       */
6337       if (func_type == Item_func::TRIG_COND_FUNC)
6338         return false;
6339 
6340       /* This is a function, apply condition recursively to arguments */
6341       if (item_func->argument_count() > 0)
6342       {
6343         Item **item_end= (item_func->arguments()) + item_func->argument_count();
6344         for (Item **child= item_func->arguments(); child != item_end; child++)
6345         {
6346           if (!uses_index_fields_only(*child, tbl, keyno, other_tbls_ok))
6347             return FALSE;
6348         }
6349       }
6350       return TRUE;
6351     }
6352   case Item::COND_ITEM:
6353     {
6354       /*
6355         This is a AND/OR condition. Regular AND/OR clauses are handled by
6356         make_cond_for_index() which will chop off the part that can be
6357         checked with index. This code is for handling non-top-level AND/ORs,
6358         e.g. func(x AND y).
6359       */
6360       List_iterator<Item> li(*((Item_cond*)item)->argument_list());
6361       Item *item;
6362       while ((item=li++))
6363       {
6364         if (!uses_index_fields_only(item, tbl, keyno, other_tbls_ok))
6365           return FALSE;
6366       }
6367       return TRUE;
6368     }
6369   case Item::FIELD_ITEM:
6370     {
6371       Item_field *item_field= (Item_field*)item;
6372       if (item_field->field->table != tbl)
6373         return other_tbls_ok;
6374       /*
6375         The below is probably a repetition - the first part checks the
6376         other two, but let's play it safe:
6377       */
6378       return item_field->field->part_of_key.is_set(keyno) &&
6379              item_field->field->type() != MYSQL_TYPE_GEOMETRY &&
6380              item_field->field->type() != MYSQL_TYPE_BLOB;
6381     }
6382   case Item::REF_ITEM:
6383     return uses_index_fields_only(item->real_item(), tbl, keyno,
6384                                   other_tbls_ok);
6385   default:
6386     return FALSE; /* Play it safe, don't push unknown non-const items */
6387   }
6388 }
6389 
6390 
6391 /**
6392   Optimize semi-join nests that could be run with sj-materialization
6393 
6394   @param join           The join to optimize semi-join nests for
6395 
6396   @details
6397     Optimize each of the semi-join nests that can be run with
6398     materialization. For each of the nests, we
6399      - Generate the best join order for this "sub-join" and remember it;
6400      - Remember the sub-join execution cost (it's part of materialization
6401        cost);
6402      - Calculate other costs that will be incurred if we decide
6403        to use materialization strategy for this semi-join nest.
6404 
6405     All obtained information is saved and will be used by the main join
6406     optimization pass.
6407 
6408   @return false if successful, true if error
6409 */
6410 
optimize_semijoin_nests_for_materialization(JOIN * join)6411 static bool optimize_semijoin_nests_for_materialization(JOIN *join)
6412 {
6413   DBUG_ENTER("optimize_semijoin_nests_for_materialization");
6414   List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests);
6415   TABLE_LIST *sj_nest;
6416   Opt_trace_context * const trace= &join->thd->opt_trace;
6417 
6418   while ((sj_nest= sj_list_it++))
6419   {
6420     /* As a precaution, reset pointers that were used in prior execution */
6421     sj_nest->nested_join->sjm.positions= NULL;
6422 
6423     /* Calculate the cost of materialization if materialization is allowed. */
6424     if (sj_nest->nested_join->sj_enabled_strategies &
6425         OPTIMIZER_SWITCH_MATERIALIZATION)
6426     {
6427       /* A semi-join nest should not contain tables marked as const */
6428       assert(!(sj_nest->sj_inner_tables & join->const_table_map));
6429 
6430       Opt_trace_object trace_wrapper(trace);
6431       Opt_trace_object
6432         trace_sjmat(trace, "execution_plan_for_potential_materialization");
6433       Opt_trace_array trace_sjmat_steps(trace, "steps");
6434       /*
6435         Try semijoin materialization if the semijoin is classified as
6436         non-trivially-correlated.
6437       */
6438       if (sj_nest->nested_join->sj_corr_tables)
6439         continue;
6440       /*
6441         Check whether data types allow execution with materialization.
6442       */
6443       semijoin_types_allow_materialization(sj_nest);
6444 
6445       if (!sj_nest->nested_join->sjm.scan_allowed &&
6446           !sj_nest->nested_join->sjm.lookup_allowed)
6447         continue;
6448 
6449       if (Optimize_table_order(join->thd, join, sj_nest).choose_table_order())
6450         DBUG_RETURN(true);
6451       const uint n_tables= my_count_bits(sj_nest->sj_inner_tables);
6452       calculate_materialization_costs(join, sj_nest, n_tables,
6453                                       &sj_nest->nested_join->sjm);
6454       /*
6455         Cost data is in sj_nest->nested_join->sjm. We also need to save the
6456         plan:
6457       */
6458       if (!(sj_nest->nested_join->sjm.positions=
6459             (st_position*)join->thd->alloc(sizeof(st_position)*n_tables)))
6460         DBUG_RETURN(true);
6461       memcpy(static_cast<void*>(sj_nest->nested_join->sjm.positions),
6462              join->best_positions + join->const_tables,
6463              sizeof(st_position) * n_tables);
6464     }
6465   }
6466   DBUG_RETURN(false);
6467 }
6468 
6469 
6470 /*
6471   Check if table's Key_use elements have an eq_ref(outer_tables) candidate
6472 
6473   SYNOPSIS
6474     find_eq_ref_candidate()
6475       tl                Table to be checked
6476       sj_inner_tables   Bitmap of inner tables. eq_ref(inner_table) doesn't
6477                         count.
6478 
6479   DESCRIPTION
6480     Check if table's Key_use elements have an eq_ref(outer_tables) candidate
6481 
6482   TODO
6483     Check again if it is feasible to factor common parts with constant table
6484     search
6485 
6486   RETURN
6487     TRUE  - There exists an eq_ref(outer-tables) candidate
6488     FALSE - Otherwise
6489 */
6490 
find_eq_ref_candidate(TABLE_LIST * tl,table_map sj_inner_tables)6491 static bool find_eq_ref_candidate(TABLE_LIST *tl, table_map sj_inner_tables)
6492 {
6493   Key_use *keyuse= tl->table->reginfo.join_tab->keyuse();
6494 
6495   if (keyuse)
6496   {
6497     while (1) /* For each key */
6498     {
6499       const uint key= keyuse->key;
6500       KEY *const keyinfo= tl->table->key_info + key;
6501       key_part_map bound_parts= 0;
6502       if ((keyinfo->flags & (HA_NOSAME)) == HA_NOSAME)
6503       {
6504         do  /* For all equalities on all key parts */
6505         {
6506           /* Check if this is "t.keypart = expr(outer_tables) */
6507           if (!(keyuse->used_tables & sj_inner_tables) &&
6508               !(keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL))
6509           {
6510             /*
6511               Consider only if the resulting condition does not pass a NULL
6512               value through. Especially needed for a UNIQUE index on NULLable
6513               columns where a duplicate row is possible with NULL values.
6514             */
6515             if (keyuse->null_rejecting || !keyuse->val->maybe_null ||
6516                 !keyinfo->key_part[keyuse->keypart].field->maybe_null())
6517               bound_parts|= (key_part_map)1 << keyuse->keypart;
6518           }
6519           keyuse++;
6520         } while (keyuse->key == key && keyuse->table_ref == tl);
6521 
6522         if (bound_parts == LOWER_BITS(uint, keyinfo->user_defined_key_parts))
6523           return true;
6524         if (keyuse->table_ref != tl)
6525           return false;
6526       }
6527       else
6528       {
6529         do
6530         {
6531           keyuse++;
6532           if (keyuse->table_ref != tl)
6533             return false;
6534         }
6535         while (keyuse->key == key);
6536       }
6537     }
6538   }
6539   return false;
6540 }
6541 
6542 
6543 /**
6544   Pull tables out of semi-join nests based on functional dependencies
6545 
6546   @param join  The join where to do the semi-join table pullout
6547 
6548   @return False if successful, true if error (Out of memory)
6549 
6550   @details
6551     Pull tables out of semi-join nests based on functional dependencies,
6552     ie. if a table is accessed via eq_ref(outer_tables).
6553     The function may be called several times, the caller is responsible
6554     for setting up proper key information that this function acts upon.
6555 
6556     PRECONDITIONS
6557     When this function is called, the join may have several semi-join nests
6558     but it is guaranteed that one semi-join nest does not contain another.
6559     For functionally dependent tables to be pulled out, key information must
6560     have been calculated (see update_ref_and_keys()).
6561 
6562     POSTCONDITIONS
6563      * Tables that were pulled out are removed from the semi-join nest they
6564        belonged to and added to the parent join nest.
6565      * For these tables, the used_tables and not_null_tables fields of
6566        the semi-join nest they belonged to will be adjusted.
6567        The semi-join nest is also marked as correlated, and
6568        sj_corr_tables and sj_depends_on are adjusted if necessary.
6569      * Semi-join nests' sj_inner_tables is set equal to used_tables
6570 
6571     NOTE
6572     Table pullout may make uncorrelated subquery correlated. Consider this
6573     example:
6574 
6575      ... WHERE oe IN (SELECT it1.primary_key WHERE p(it1, it2) ... )
6576 
6577     here table it1 can be pulled out (we have it1.primary_key=oe which gives
6578     us functional dependency). Once it1 is pulled out, all references to it1
6579     from p(it1, it2) become references to outside of the subquery and thus
6580     make the subquery (i.e. its semi-join nest) correlated.
6581     Making the subquery (i.e. its semi-join nest) correlated prevents us from
6582     using Materialization or LooseScan to execute it.
6583 */
6584 
pull_out_semijoin_tables(JOIN * join)6585 static bool pull_out_semijoin_tables(JOIN *join)
6586 {
6587   TABLE_LIST *sj_nest;
6588   DBUG_ENTER("pull_out_semijoin_tables");
6589 
6590   assert(!join->select_lex->sj_nests.is_empty());
6591 
6592   List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests);
6593   Opt_trace_context * const trace= &join->thd->opt_trace;
6594   Opt_trace_object trace_wrapper(trace);
6595   Opt_trace_array trace_pullout(trace, "pulled_out_semijoin_tables");
6596 
6597   /* Try pulling out tables from each semi-join nest */
6598   while ((sj_nest= sj_list_it++))
6599   {
6600     table_map pulled_tables= 0;
6601     List_iterator<TABLE_LIST> child_li(sj_nest->nested_join->join_list);
6602     TABLE_LIST *tbl;
6603     /*
6604       Calculate set of tables within this semi-join nest that have
6605       other dependent tables
6606     */
6607     table_map dep_tables= 0;
6608     while ((tbl= child_li++))
6609     {
6610       TABLE *const table= tbl->table;
6611       if (table &&
6612          (table->reginfo.join_tab->dependent &
6613           sj_nest->nested_join->used_tables))
6614         dep_tables|= table->reginfo.join_tab->dependent;
6615     }
6616     /*
6617       Find which tables we can pull out based on key dependency data.
6618       Note that pulling one table out can allow us to pull out some
6619       other tables too.
6620     */
6621     bool pulled_a_table;
6622     do
6623     {
6624       pulled_a_table= FALSE;
6625       child_li.rewind();
6626       while ((tbl= child_li++))
6627       {
6628         if (tbl->table &&
6629             !(pulled_tables & tbl->map()) &&
6630             !(dep_tables & tbl->map()))
6631         {
6632           if (find_eq_ref_candidate(tbl,
6633                                     sj_nest->nested_join->used_tables &
6634                                     ~pulled_tables))
6635           {
6636             pulled_a_table= TRUE;
6637             pulled_tables |= tbl->map();
6638             Opt_trace_object(trace).add_utf8_table(tbl).
6639               add("functionally_dependent", true);
6640             /*
6641               Pulling a table out of uncorrelated subquery in general makes
6642               it correlated. See the NOTE to this function.
6643             */
6644             sj_nest->nested_join->sj_corr_tables|= tbl->map();
6645             sj_nest->nested_join->sj_depends_on|= tbl->map();
6646           }
6647         }
6648       }
6649     } while (pulled_a_table);
6650 
6651     child_li.rewind();
6652     /*
6653       Move the pulled out TABLE_LIST elements to the parents.
6654     */
6655     sj_nest->nested_join->used_tables&= ~pulled_tables;
6656     sj_nest->nested_join->not_null_tables&= ~pulled_tables;
6657 
6658     /* sj_inner_tables is a copy of nested_join->used_tables */
6659     sj_nest->sj_inner_tables= sj_nest->nested_join->used_tables;
6660 
6661     if (pulled_tables)
6662     {
6663       List<TABLE_LIST> *upper_join_list= (sj_nest->embedding != NULL) ?
6664           &sj_nest->embedding->nested_join->join_list :
6665           &join->select_lex->top_join_list;
6666 
6667       Prepared_stmt_arena_holder ps_arena_holder(join->thd);
6668 
6669       while ((tbl= child_li++))
6670       {
6671         if (tbl->table &&
6672             !(sj_nest->nested_join->used_tables & tbl->map()))
6673         {
6674           /*
6675             Pull the table up in the same way as simplify_joins() does:
6676             update join_list and embedding pointers but keep next[_local]
6677             pointers.
6678           */
6679           child_li.remove();
6680 
6681           if (upper_join_list->push_back(tbl))
6682             DBUG_RETURN(TRUE);
6683 
6684           tbl->join_list= upper_join_list;
6685           tbl->embedding= sj_nest->embedding;
6686         }
6687       }
6688 
6689       /* Remove the sj-nest itself if we've removed everything from it */
6690       if (!sj_nest->nested_join->used_tables)
6691       {
6692         List_iterator<TABLE_LIST> li(*upper_join_list);
6693         /* Find the sj_nest in the list. */
6694         while (sj_nest != li++)
6695         {}
6696         li.remove();
6697         /* Also remove it from the list of SJ-nests: */
6698         sj_list_it.remove();
6699       }
6700     }
6701   }
6702   DBUG_RETURN(FALSE);
6703 }
6704 
6705 
6706 /**
6707   @defgroup RefOptimizerModule Ref Optimizer
6708 
6709   @{
6710 
6711   This module analyzes all equality predicates to determine the best
6712   independent ref/eq_ref/ref_or_null index access methods.
6713 
6714   The 'ref' optimizer determines the columns (and expressions over them) that
6715   reference columns in other tables via an equality, and analyzes which keys
6716   and key parts can be used for index lookup based on these references. The
6717   main outcomes of the 'ref' optimizer are:
6718 
6719   - A bi-directional graph of all equi-join conditions represented as an
6720     array of Key_use elements. This array is stored in JOIN::keyuse_array in
6721     table, key, keypart order. Each JOIN_TAB::keyuse points to the
6722     first Key_use element with the same table as JOIN_TAB::table.
6723 
6724   - The table dependencies needed by the optimizer to determine what
6725     tables must be before certain table so that they provide the
6726     necessary column bindings for the equality predicates.
6727 
6728   - Computed properties of the equality predicates such as null_rejecting
6729     and the result size of each separate condition.
6730 
6731   Updates in JOIN_TAB:
6732   - JOIN_TAB::keys       Bitmap of all used keys.
6733   - JOIN_TAB::const_keys Bitmap of all keys that may be used with quick_select.
6734   - JOIN_TAB::keyuse     Pointer to possible keys.
6735 */
6736 
6737 /**
6738   A Key_field is a descriptor of a predicate of the form (column <op> val).
6739   Currently 'op' is one of {'=', '<=>', 'IS [NOT] NULL', 'arg1 IN arg2'},
6740   and 'val' can be either another column or an expression (including constants).
6741 
6742   Key_field's are used to analyze columns that may potentially serve as
6743   parts of keys for index lookup. If 'field' is part of an index, then
6744   add_key_part() creates a corresponding Key_use object and inserts it
6745   into the JOIN::keyuse_array which is passed by update_ref_and_keys().
6746 
6747   The structure is used only during analysis of the candidate columns for
6748   index 'ref' access.
6749 */
6750 struct Key_field {
Key_fieldKey_field6751   Key_field(Item_field *item_field, Item *val, uint level,
6752             uint optimize, bool eq_func,
6753             bool null_rejecting, bool *cond_guard, uint sj_pred_no)
6754   : item_field(item_field), val(val), level(level),
6755     optimize(optimize), eq_func(eq_func),
6756     null_rejecting(null_rejecting), cond_guard(cond_guard),
6757     sj_pred_no(sj_pred_no)
6758   {}
6759   Item_field    *item_field;           ///< Item representing the column
6760   Item          *val;                  ///< May be empty if diff constant
6761   uint          level;
6762   uint          optimize;              ///< KEY_OPTIMIZE_*
6763   bool          eq_func;
6764   /**
6765     If true, the condition this struct represents will not be satisfied
6766     when val IS NULL.
6767     @sa Key_use::null_rejecting .
6768   */
6769   bool          null_rejecting;
6770   bool          *cond_guard;                    ///< @sa Key_use::cond_guard
6771   uint          sj_pred_no;                     ///< @sa Key_use::sj_pred_no
6772 };
6773 
6774 /* Values in optimize */
6775 #define KEY_OPTIMIZE_EXISTS		1
6776 #define KEY_OPTIMIZE_REF_OR_NULL	2
6777 
6778 /**
6779   Merge new key definitions to old ones, remove those not used in both.
6780 
6781   This is called for OR between different levels.
6782 
6783   To be able to do 'ref_or_null' we merge a comparison of a column
6784   and 'column IS NULL' to one test.  This is useful for sub select queries
6785   that are internally transformed to something like:.
6786 
6787   @code
6788   SELECT * FROM t1 WHERE t1.key=outer_ref_field or t1.key IS NULL
6789   @endcode
6790 
6791   Key_field::null_rejecting is processed as follows: @n
6792   result has null_rejecting=true if it is set for both ORed references.
6793   for example:
6794   -   (t2.key = t1.field OR t2.key  =  t1.field) -> null_rejecting=true
6795   -   (t2.key = t1.field OR t2.key <=> t1.field) -> null_rejecting=false
6796 
6797   @todo
6798     The result of this is that we're missing some 'ref' accesses.
6799     OptimizerTeam: Fix this
6800 */
6801 
6802 static Key_field *
merge_key_fields(Key_field * start,Key_field * new_fields,Key_field * end,uint and_level)6803 merge_key_fields(Key_field *start, Key_field *new_fields, Key_field *end,
6804                  uint and_level)
6805 {
6806   if (start == new_fields)
6807     return start;				// Impossible or
6808   if (new_fields == end)
6809     return start;				// No new fields, skip all
6810 
6811   Key_field *first_free=new_fields;
6812 
6813   /* Mark all found fields in old array */
6814   for (; new_fields != end ; new_fields++)
6815   {
6816     Field *const new_field= new_fields->item_field->field;
6817 
6818     for (Key_field *old=start ; old != first_free ; old++)
6819     {
6820       Field *const old_field= old->item_field->field;
6821 
6822       /*
6823         Check that the Field objects are the same, as we may have several
6824         Item_field objects pointing to the same Field:
6825       */
6826       if (old_field == new_field)
6827       {
6828         /*
6829           NOTE: below const_item() call really works as "!used_tables()", i.e.
6830           it can return FALSE where it is feasible to make it return TRUE.
6831 
6832           The cause is as follows: Some of the tables are already known to be
6833           const tables (the detection code is in JOIN::make_join_plan(),
6834           above the update_ref_and_keys() call), but we didn't propagate
6835           information about this: TABLE::const_table is not set to TRUE, and
6836           Item::update_used_tables() hasn't been called for each item.
6837           The result of this is that we're missing some 'ref' accesses.
6838           TODO: OptimizerTeam: Fix this
6839         */
6840         if (!new_fields->val->const_item())
6841         {
6842           /*
6843             If the value matches, we can use the key reference.
6844             If not, we keep it until we have examined all new values
6845           */
6846           if (old->val->eq(new_fields->val, old_field->binary()))
6847           {
6848             old->level= and_level;
6849             old->optimize= ((old->optimize & new_fields->optimize &
6850                              KEY_OPTIMIZE_EXISTS) |
6851                             ((old->optimize | new_fields->optimize) &
6852                              KEY_OPTIMIZE_REF_OR_NULL));
6853             old->null_rejecting= (old->null_rejecting &&
6854                                   new_fields->null_rejecting);
6855           }
6856         }
6857         else if (old->eq_func && new_fields->eq_func &&
6858                  old->val->eq_by_collation(new_fields->val,
6859                                            old_field->binary(),
6860                                            old_field->charset()))
6861         {
6862           old->level= and_level;
6863           old->optimize= ((old->optimize & new_fields->optimize &
6864                            KEY_OPTIMIZE_EXISTS) |
6865                           ((old->optimize | new_fields->optimize) &
6866                            KEY_OPTIMIZE_REF_OR_NULL));
6867           old->null_rejecting= (old->null_rejecting &&
6868                                 new_fields->null_rejecting);
6869         }
6870         else if (old->eq_func && new_fields->eq_func &&
6871                  ((old->val->const_item() && old->val->is_null()) ||
6872                   new_fields->val->is_null()))
6873         {
6874           /* field = expression OR field IS NULL */
6875           old->level= and_level;
6876           old->optimize= KEY_OPTIMIZE_REF_OR_NULL;
6877           /*
6878             Remember the NOT NULL value unless the value does not depend
6879             on other tables.
6880           */
6881           if (!old->val->used_tables() && old->val->is_null())
6882             old->val= new_fields->val;
6883           /* The referred expression can be NULL: */
6884           old->null_rejecting= 0;
6885 	}
6886 	else
6887 	{
6888 	  /*
6889 	    We are comparing two different const.  In this case we can't
6890 	    use a key-lookup on this so it's better to remove the value
6891 	    and let the range optimizer handle it
6892 	  */
6893 	  if (old == --first_free)		// If last item
6894 	    break;
6895 	  *old= *first_free;			// Remove old value
6896 	  old--;				// Retry this value
6897 	}
6898       }
6899     }
6900   }
6901   /* Remove all not used items */
6902   for (Key_field *old=start ; old != first_free ;)
6903   {
6904     if (old->level != and_level)
6905     {						// Not used in all levels
6906       if (old == --first_free)
6907         break;
6908       *old= *first_free;			// Remove old value
6909       continue;
6910     }
6911     old++;
6912   }
6913   return first_free;
6914 }
6915 
6916 
6917 /**
6918   Given a field, return its index in semi-join's select list, or UINT_MAX
6919 
6920   @param item_field Field to be looked up in select list
6921 
6922   @retval =UINT_MAX Field is not from a semijoin-transformed subquery
6923   @retval <UINT_MAX Index in select list of subquery
6924 
6925   @details
6926   Given a field, find its table; then see if the table is within a
6927   semi-join nest and if the field was in select list of the subquery
6928   (if subquery was part of a quantified comparison predicate), or
6929   the field was a result of subquery decorrelation.
6930   If it was, then return the field's index in the select list.
6931   The value is used by LooseScan strategy.
6932 */
6933 
get_semi_join_select_list_index(Item_field * item_field)6934 static uint get_semi_join_select_list_index(Item_field *item_field)
6935 {
6936   TABLE_LIST *emb_sj_nest= item_field->table_ref->embedding;
6937   if (emb_sj_nest && emb_sj_nest->sj_cond())
6938   {
6939     List<Item> &items= emb_sj_nest->nested_join->sj_inner_exprs;
6940     List_iterator<Item> it(items);
6941     for (uint i= 0; i < items.elements; i++)
6942     {
6943       Item *sel_item= it++;
6944       if (sel_item->type() == Item::FIELD_ITEM &&
6945           ((Item_field*)sel_item)->field->eq(item_field->field))
6946         return i;
6947     }
6948   }
6949   return UINT_MAX;
6950 }
6951 
6952 /**
6953    @brief
6954    If EXPLAIN EXTENDED  or if the --safe-updates option is enabled, add a
6955    warning that an index cannot be used for ref access
6956 
6957    @details
6958    If EXPLAIN EXTENDED or if the --safe-updates option is enabled, add a
6959    warning for each index that cannot be used for ref access due to either type
6960    conversion or different collations on the field used for comparison
6961 
6962    Example type conversion (char compared to int):
6963 
6964    CREATE TABLE t1 (url char(1) PRIMARY KEY);
6965    SELECT * FROM t1 WHERE url=1;
6966 
6967    Example different collations (danish vs german2):
6968 
6969    CREATE TABLE t1 (url char(1) PRIMARY KEY) collate latin1_danish_ci;
6970    SELECT * FROM t1 WHERE url='1' collate latin1_german2_ci;
6971 
6972    @param thd                Thread for the connection that submitted the query
6973    @param field              Field used in comparision
6974    @param cant_use_index   Indexes that cannot be used for lookup
6975  */
6976 static void
warn_index_not_applicable(THD * thd,const Field * field,const key_map cant_use_index)6977 warn_index_not_applicable(THD *thd, const Field *field,
6978                           const key_map cant_use_index)
6979 {
6980   if (thd->lex->describe ||
6981       thd->variables.option_bits & OPTION_SAFE_UPDATES)
6982     for (uint j=0 ; j < field->table->s->keys ; j++)
6983       if (cant_use_index.is_set(j))
6984         push_warning_printf(thd,
6985                             Sql_condition::SL_WARNING,
6986                             ER_WARN_INDEX_NOT_APPLICABLE,
6987                             ER(ER_WARN_INDEX_NOT_APPLICABLE),
6988                             "ref",
6989                             field->table->key_info[j].name,
6990                             field->field_name);
6991 }
6992 
6993 /**
6994   Add a possible key to array of possible keys if it's usable as a key
6995 
6996   @param key_fields[in,out] Used as an input paramater in the sense that it is a
6997   pointer to a pointer to a memory area where an array of Key_field objects will
6998   stored. It is used as an out parameter in the sense that the pointer will be
6999   updated to point beyond the last Key_field written.
7000 
7001   @param and_level       And level, to be stored in Key_field
7002   @param cond            Condition predicate
7003   @param field           Field used in comparision
7004   @param eq_func         True if we used =, <=> or IS NULL
7005   @param value           Array of values used for comparison with field
7006   @param num_values      Number of elements in the array of values
7007   @param usable_tables   Tables which can be used for key optimization
7008   @param sargables       IN/OUT Array of found sargable candidates. Will be
7009                          ignored in case eq_func is true.
7010 
7011   @note
7012     If we are doing a NOT NULL comparison on a NOT NULL field in a outer join
7013     table, we store this to be able to do not exists optimization later.
7014 
7015   @return
7016     *key_fields is incremented if we stored a key in the array
7017 */
7018 
7019 static void
add_key_field(Key_field ** key_fields,uint and_level,Item_func * cond,Item_field * item_field,bool eq_func,Item ** value,uint num_values,table_map usable_tables,SARGABLE_PARAM ** sargables)7020 add_key_field(Key_field **key_fields, uint and_level, Item_func *cond,
7021               Item_field *item_field, bool eq_func, Item **value,
7022               uint num_values, table_map usable_tables,
7023               SARGABLE_PARAM **sargables)
7024 {
7025   assert(eq_func || sargables);
7026 
7027   Field *const field= item_field->field;
7028   TABLE_LIST *const tl= item_field->table_ref;
7029 
7030   if (tl->table->reginfo.join_tab == NULL)
7031   {
7032     /*
7033        Due to a bug in IN-to-EXISTS (grep for real_item() in item_subselect.cc
7034        for more info), an index over a field from an outer query might be
7035        considered here, which is incorrect. Their query has been fully
7036        optimized already so their reginfo.join_tab is NULL and we reject them.
7037     */
7038     return;
7039   }
7040 
7041   DBUG_PRINT("info", ("add_key_field for field %s", field->field_name));
7042   uint exists_optimize= 0;
7043   if (!tl->derived_keys_ready && tl->uses_materialization() &&
7044       !tl->table->is_created() &&
7045       tl->update_derived_keys(field, value, num_values))
7046     return;
7047   if (!(field->flags & PART_KEY_FLAG))
7048   {
7049     // Don't remove column IS NULL on a LEFT JOIN table
7050     if (!eq_func || (*value)->type() != Item::NULL_ITEM ||
7051         !tl->table->is_nullable() || field->real_maybe_null())
7052       return;					// Not a key. Skip it
7053     exists_optimize= KEY_OPTIMIZE_EXISTS;
7054     assert(num_values == 1);
7055   }
7056   else
7057   {
7058     table_map used_tables= 0;
7059     bool optimizable= false;
7060     for (uint i=0; i<num_values; i++)
7061     {
7062       used_tables|=(value[i])->used_tables();
7063       if (!((value[i])->used_tables() & (tl->map() | RAND_TABLE_BIT)))
7064         optimizable= true;
7065     }
7066     if (!optimizable)
7067       return;
7068     if (!(usable_tables & tl->map()))
7069     {
7070       if (!eq_func || (*value)->type() != Item::NULL_ITEM ||
7071           !tl->table->is_nullable() || field->real_maybe_null())
7072         return; // Can't use left join optimize
7073       exists_optimize= KEY_OPTIMIZE_EXISTS;
7074     }
7075     else
7076     {
7077       JOIN_TAB *stat= tl->table->reginfo.join_tab;
7078       key_map possible_keys=field->key_start;
7079       possible_keys.intersect(tl->table->keys_in_use_for_query);
7080       stat[0].keys().merge(possible_keys);             // Add possible keys
7081 
7082       /*
7083         Save the following cases:
7084         Field op constant
7085         Field LIKE constant where constant doesn't start with a wildcard
7086         Field = field2 where field2 is in a different table
7087         Field op formula
7088         Field IS NULL
7089         Field IS NOT NULL
7090         Field BETWEEN ...
7091         Field IN ...
7092       */
7093       stat[0].key_dependent|=used_tables;
7094 
7095       bool is_const= true;
7096       for (uint i=0; i<num_values; i++)
7097       {
7098         if (!(is_const&= value[i]->const_item()))
7099           break;
7100       }
7101       if (is_const)
7102         stat[0].const_keys.merge(possible_keys);
7103       else if (!eq_func)
7104       {
7105         /*
7106           Save info to be able check whether this predicate can be
7107           considered as sargable for range analysis after reading const tables.
7108           We do not save info about equalities as update_const_equal_items
7109           will take care of updating info on keys from sargable equalities.
7110         */
7111         assert(sargables);
7112         (*sargables)--;
7113         /*
7114           The sargables and key_fields arrays share the same memory
7115           buffer, and grow from opposite directions, so make sure they
7116           don't cross.
7117         */
7118         assert(*sargables > *reinterpret_cast<SARGABLE_PARAM**>(key_fields));
7119         (*sargables)->field= field;
7120         (*sargables)->arg_value= value;
7121         (*sargables)->num_values= num_values;
7122       }
7123       /*
7124         We can't always use indexes when comparing a string index to a
7125         number. cmp_type() is checked to allow compare of dates to numbers.
7126         eq_func is NEVER true when num_values > 1
7127        */
7128       if (!eq_func)
7129         return;
7130 
7131       /*
7132         Check if the field and value are comparable in the index.
7133         @todo: This code is almost identical to comparable_in_index()
7134         in opt_range.cc. Consider replacing the checks below with a
7135         function call to comparable_in_index()
7136       */
7137       if (field->result_type() == STRING_RESULT)
7138       {
7139         if ((*value)->result_type() != STRING_RESULT)
7140         {
7141           if (field->cmp_type() != (*value)->result_type())
7142           {
7143             warn_index_not_applicable(stat->join()->thd, field, possible_keys);
7144             return;
7145           }
7146         }
7147         else
7148         {
7149           /*
7150             Can't optimize datetime_column=indexed_varchar_column,
7151             also can't use indexes if the effective collation
7152             of the operation differ from the field collation.
7153             IndexedTimeComparedToDate: can't optimize
7154             'indexed_time = temporal_expr_with_date_part' because:
7155             - without index, a TIME column with value '48:00:00' is equal to a
7156             DATETIME column with value 'CURDATE() + 2 days'
7157             - with ref access into the TIME column, CURDATE() + 2 days becomes
7158             "00:00:00" (Field_timef::store_internal() simply extracts the time
7159             part from the datetime) which is a lookup key which does not match
7160             "48:00:00"; so ref access is not be able to give the same result
7161             as without index, so is disabled.
7162             On the other hand, we can optimize indexed_datetime = time
7163             because Field_temporal_with_date::store_time() will convert
7164             48:00:00 to CURDATE() + 2 days which is the correct lookup key.
7165           */
7166           if ((!field->is_temporal() && value[0]->is_temporal()) ||
7167               (field->cmp_type() == STRING_RESULT &&
7168                field->charset() != cond->compare_collation()) ||
7169               field_time_cmp_date(field, value[0]))
7170           {
7171             warn_index_not_applicable(stat->join()->thd, field, possible_keys);
7172             return;
7173           }
7174         }
7175       }
7176 
7177       /*
7178         We can't use indexes when comparing to a JSON value. For example,
7179         the string '{}' should compare equal to the JSON string "{}". If
7180         we use a string index to compare the two strings, we will be
7181         comparing '{}' and '"{}"', which don't compare equal.
7182       */
7183       if (value[0]->result_type() == STRING_RESULT &&
7184           value[0]->field_type() == MYSQL_TYPE_JSON)
7185       {
7186         warn_index_not_applicable(stat->join()->thd, field, possible_keys);
7187         return;
7188       }
7189     }
7190   }
7191   /*
7192     For the moment eq_func is always true. This slot is reserved for future
7193     extensions where we want to remembers other things than just eq comparisons
7194   */
7195   assert(eq_func);
7196   /*
7197     If the condition has form "tbl.keypart = othertbl.field" and
7198     othertbl.field can be NULL, there will be no matches if othertbl.field
7199     has NULL value.
7200     We use null_rejecting in add_not_null_conds() to add
7201     'othertbl.field IS NOT NULL' to tab->m_condition, if this is not an outer
7202     join. We also use it to shortcut reading "tbl" when othertbl.field is
7203     found to be a NULL value (in join_read_always_key() and BKA).
7204   */
7205   Item *const real= (*value)->real_item();
7206   const bool null_rejecting=
7207       ((cond->functype() == Item_func::EQ_FUNC) ||
7208        (cond->functype() == Item_func::MULT_EQUAL_FUNC)) &&
7209       (real->type() == Item::FIELD_ITEM) &&
7210       ((Item_field*)real)->field->maybe_null();
7211 
7212   /* Store possible eq field */
7213   new (*key_fields)
7214     Key_field(item_field, *value, and_level, exists_optimize, eq_func,
7215               null_rejecting, NULL,
7216               get_semi_join_select_list_index(item_field));
7217   (*key_fields)++;
7218   /*
7219     The sargables and key_fields arrays share the same memory buffer,
7220     and grow from opposite directions, so make sure they don't
7221     cross. But if sargables was NULL, eq_func had to be true and we
7222     don't write any sargables.
7223   */
7224   assert(sargables == NULL ||
7225          *key_fields < *reinterpret_cast<Key_field**>(sargables));
7226 }
7227 
7228 /**
7229   Add possible keys to array of possible keys originated from a simple
7230   predicate.
7231 
7232     @param  key_fields     Pointer to add key, if usable
7233     @param  and_level      And level, to be stored in Key_field
7234     @param  cond           Condition predicate
7235     @param  field_item     Field used in comparision
7236     @param  eq_func        True if we used =, <=> or IS NULL
7237     @param  val            Value used for comparison with field
7238                            Is NULL for BETWEEN and IN
7239     @param  usable_tables  Tables which can be used for key optimization
7240     @param  sargables      IN/OUT Array of found sargable candidates
7241 
7242   @note
7243     If field items f1 and f2 belong to the same multiple equality and
7244     a key is added for f1, the the same key is added for f2.
7245 
7246   @returns
7247     *key_fields is incremented if we stored a key in the array
7248 */
7249 
7250 static void
add_key_equal_fields(Key_field ** key_fields,uint and_level,Item_func * cond,Item_field * field_item,bool eq_func,Item ** val,uint num_values,table_map usable_tables,SARGABLE_PARAM ** sargables)7251 add_key_equal_fields(Key_field **key_fields, uint and_level,
7252                      Item_func *cond, Item_field *field_item,
7253                      bool eq_func, Item **val,
7254                      uint num_values, table_map usable_tables,
7255                      SARGABLE_PARAM **sargables)
7256 {
7257   DBUG_ENTER("add_key_equal_fields");
7258 
7259   add_key_field(key_fields, and_level, cond, field_item,
7260                 eq_func, val, num_values, usable_tables, sargables);
7261   Item_equal *item_equal= field_item->item_equal;
7262   if (item_equal)
7263   {
7264     /*
7265       Add to the set of possible key values every substitution of
7266       the field for an equal field included into item_equal
7267     */
7268     Item_equal_iterator it(*item_equal);
7269     Item_field *item;
7270     while ((item= it++))
7271     {
7272       if (!field_item->field->eq(item->field))
7273         add_key_field(key_fields, and_level, cond, item,
7274                       eq_func, val, num_values, usable_tables,
7275                       sargables);
7276     }
7277   }
7278   DBUG_VOID_RETURN;
7279 }
7280 
7281 
7282 /**
7283   Check if an expression is a non-outer field.
7284 
7285   Checks if an expression is a field and belongs to the current select.
7286 
7287   @param   field  Item expression to check
7288 
7289   @return boolean
7290      @retval TRUE   the expression is a local field
7291      @retval FALSE  it's something else
7292 */
7293 
7294 static bool
is_local_field(Item * field)7295 is_local_field (Item *field)
7296 {
7297   return field->real_item()->type() == Item::FIELD_ITEM &&
7298     !(field->used_tables() & OUTER_REF_TABLE_BIT) &&
7299     !down_cast<Item_ident *>(field)->depended_from &&
7300     !down_cast<Item_ident *>(field->real_item())->depended_from;
7301 }
7302 
7303 
7304 /**
7305   Check if a row constructor expression is over columns in the same query block.
7306 
7307   @param item_row Row expression to check.
7308 
7309   @return boolean
7310   @retval true  The expression is a local column reference.
7311   @retval false It's something else.
7312 */
is_row_of_local_columns(Item_row * item_row)7313 static bool is_row_of_local_columns(Item_row *item_row)
7314 {
7315   for (uint i= 0; i < item_row->cols(); ++i)
7316     if (!is_local_field(item_row->element_index(i)))
7317       return false;
7318   return true;
7319 }
7320 
7321 
7322 /**
7323    The guts of the ref optimizer. This function, along with the other
7324    add_key_* functions, make up a recursive procedure that analyzes a
7325    condition expression (a tree of AND and OR predicates) and does
7326    many things.
7327 
7328    @param join The query block involving the condition.
7329 
7330    @param key_fields[in,out] Start of memory buffer, see below.
7331    @param and_level[in, out] Current 'and level', see below.
7332    @param cond The conditional expression to analyze.
7333    @param usable_tables Tables not in this bitmap will not be examined.
7334    @param sargables [in,out] End of memory buffer, see below.
7335 
7336    This documentation is the result of reverse engineering and may
7337    therefore not capture the full gist of the procedure, but it is
7338    known to do the following:
7339 
7340    - Populate a raw memory buffer from two directions at the same time. An
7341      'array' of Key_field objects fill the buffer from low to high addresses
7342      whilst an 'array' of SARGABLE_PARAM's fills the buffer from high to low
7343      addresses. At the first call to this function, it is assumed that
7344      key_fields points to the beginning of the buffer and sargables point to the
7345      end (except for a poor-mans 'null element' at the very end).
7346 
7347    - Update a number of properties in the JOIN_TAB's that can be used
7348      to find search keys (sargables).
7349 
7350      - JOIN_TAB::keys
7351      - JOIN_TAB::key_dependent
7352      - JOIN_TAB::const_keys (dictates if the range optimizer will be run
7353        later.)
7354 
7355    The Key_field objects are marked with something called an 'and_level', which
7356    does @b not correspond to their nesting depth within the expression tree. It
7357    is rather a tag to group conjunctions together. For instance, in the
7358    conditional expression
7359 
7360    @code
7361      a = 0 AND b = 0
7362    @endcode
7363 
7364    two Key_field's are produced, both having an and_level of 0.
7365 
7366    In an expression such as
7367 
7368    @code
7369      a = 0 AND b = 0 OR a = 1
7370    @endcode
7371 
7372    three Key_field's are produced, the first two corresponding to 'a = 0' and
7373    'b = 0', respectively, both with and_level 0. The third one corresponds to
7374    'a = 1' and has an and_level of 1.
7375 
7376    A separate function, merge_key_fields() performs ref access validation on
7377    the Key_field array on the recursice ascent. If some Key_field's cannot be
7378    used for ref access, the key_fields pointer is rolled back. All other
7379    modifications to the query plan remain.
7380 */
7381 static void
add_key_fields(JOIN * join,Key_field ** key_fields,uint * and_level,Item * cond,table_map usable_tables,SARGABLE_PARAM ** sargables)7382 add_key_fields(JOIN *join, Key_field **key_fields, uint *and_level,
7383                Item *cond, table_map usable_tables,
7384                SARGABLE_PARAM **sargables)
7385 {
7386   DBUG_ENTER("add_key_fields");
7387   if (cond->type() == Item_func::COND_ITEM)
7388   {
7389     List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
7390     Key_field *org_key_fields= *key_fields;
7391 
7392     if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
7393     {
7394       Item *item;
7395       while ((item=li++))
7396         add_key_fields(join, key_fields, and_level, item, usable_tables,
7397                        sargables);
7398       for (; org_key_fields != *key_fields ; org_key_fields++)
7399         org_key_fields->level= *and_level;
7400     }
7401     else
7402     {
7403       (*and_level)++;
7404       add_key_fields(join, key_fields, and_level, li++, usable_tables,
7405                      sargables);
7406       Item *item;
7407       while ((item=li++))
7408       {
7409         Key_field *start_key_fields= *key_fields;
7410         (*and_level)++;
7411         add_key_fields(join, key_fields, and_level, item, usable_tables,
7412                        sargables);
7413         *key_fields=merge_key_fields(org_key_fields,start_key_fields,
7414                                      *key_fields,++(*and_level));
7415       }
7416     }
7417     DBUG_VOID_RETURN;
7418   }
7419 
7420   /*
7421     Subquery optimization: Conditions that are pushed down into subqueries
7422     are wrapped into Item_func_trig_cond. We process the wrapped condition
7423     but need to set cond_guard for Key_use elements generated from it.
7424   */
7425   {
7426     if (cond->type() == Item::FUNC_ITEM &&
7427         ((Item_func*)cond)->functype() == Item_func::TRIG_COND_FUNC)
7428     {
7429       Item *cond_arg= ((Item_func*)cond)->arguments()[0];
7430       if (!join->group_list && !join->order &&
7431           join->unit->item &&
7432           join->unit->item->substype() == Item_subselect::IN_SUBS &&
7433           !join->unit->is_union())
7434       {
7435         Key_field *save= *key_fields;
7436         add_key_fields(join, key_fields, and_level, cond_arg, usable_tables,
7437                        sargables);
7438         // Indicate that this ref access candidate is for subquery lookup:
7439         for (; save != *key_fields; save++)
7440           save->cond_guard= ((Item_func_trig_cond*)cond)->get_trig_var();
7441       }
7442       DBUG_VOID_RETURN;
7443     }
7444   }
7445 
7446   /* If item is of type 'field op field/constant' add it to key_fields */
7447   if (cond->type() != Item::FUNC_ITEM)
7448     DBUG_VOID_RETURN;
7449   Item_func *cond_func= (Item_func*) cond;
7450   switch (cond_func->select_optimize()) {
7451   case Item_func::OPTIMIZE_NONE:
7452     break;
7453   case Item_func::OPTIMIZE_KEY:
7454   {
7455     Item **values;
7456     /*
7457       Build list of possible keys for 'a BETWEEN low AND high'.
7458       It is handled similar to the equivalent condition
7459       'a >= low AND a <= high':
7460     */
7461     if (cond_func->functype() == Item_func::BETWEEN)
7462     {
7463       Item_field *field_item;
7464       bool equal_func= FALSE;
7465       uint num_values= 2;
7466       values= cond_func->arguments();
7467 
7468       bool binary_cmp= (values[0]->real_item()->type() == Item::FIELD_ITEM)
7469             ? ((Item_field*)values[0]->real_item())->field->binary()
7470             : TRUE;
7471 
7472       /*
7473         Additional optimization: If 'low = high':
7474         Handle as if the condition was "t.key = low".
7475       */
7476       if (!((Item_func_between*)cond_func)->negated &&
7477           values[1]->eq(values[2], binary_cmp))
7478       {
7479         equal_func= TRUE;
7480         num_values= 1;
7481       }
7482 
7483       /*
7484         Append keys for 'field <cmp> value[]' if the
7485         condition is of the form::
7486         '<field> BETWEEN value[1] AND value[2]'
7487       */
7488       if (is_local_field (values[0]))
7489       {
7490         field_item= (Item_field *) (values[0]->real_item());
7491         add_key_equal_fields(key_fields, *and_level, cond_func,
7492                              field_item, equal_func, &values[1],
7493                              num_values, usable_tables, sargables);
7494       }
7495       /*
7496         Append keys for 'value[0] <cmp> field' if the
7497         condition is of the form:
7498         'value[0] BETWEEN field1 AND field2'
7499       */
7500       for (uint i= 1; i <= num_values; i++)
7501       {
7502         if (is_local_field (values[i]))
7503         {
7504           field_item= (Item_field *) (values[i]->real_item());
7505           add_key_equal_fields(key_fields, *and_level, cond_func,
7506                                field_item, equal_func, values,
7507                                1, usable_tables, sargables);
7508         }
7509       }
7510     } // if ( ... Item_func::BETWEEN)
7511 
7512     // The predicate is IN or !=
7513     else if (is_local_field (cond_func->key_item()) &&
7514             !(cond_func->used_tables() & OUTER_REF_TABLE_BIT))
7515     {
7516       values= cond_func->arguments()+1;
7517       if (cond_func->functype() == Item_func::NE_FUNC &&
7518         is_local_field (cond_func->arguments()[1]))
7519         values--;
7520       assert(cond_func->functype() != Item_func::IN_FUNC ||
7521              cond_func->argument_count() != 2);
7522       add_key_equal_fields(key_fields, *and_level, cond_func,
7523                            (Item_field*) (cond_func->key_item()->real_item()),
7524                            0, values,
7525                            cond_func->argument_count()-1,
7526                            usable_tables, sargables);
7527     }
7528     else if (cond_func->functype() == Item_func::IN_FUNC &&
7529              cond_func->key_item()->type() == Item::ROW_ITEM)
7530     {
7531       /*
7532         The condition is (column1, column2, ... ) IN ((const1_1, const1_2), ...)
7533         and there is an index on (column1, column2, ...)
7534 
7535         The code below makes sure that the row constructor on the lhs indeed
7536         contains only column references before calling add_key_field on them.
7537 
7538         We can't do a ref access on IN, yet here we are. Why? We need
7539         to run add_key_field() only because it verifies that there are
7540         only constant expressions in the rows on the IN's rhs, see
7541         comment above the call to add_key_field() below.
7542 
7543         Actually, We could in theory do a ref access if the IN rhs
7544         contained just a single row, but there is a hack in the parser
7545         causing such IN predicates be parsed as row equalities.
7546       */
7547       Item_row *lhs_row= static_cast<Item_row*>(cond_func->key_item());
7548       if (is_row_of_local_columns(lhs_row))
7549       {
7550         for (uint i= 0; i < lhs_row->cols(); ++i)
7551         {
7552           Item *const lhs_item= lhs_row->element_index(i)->real_item();
7553           assert(lhs_item->type() == Item::FIELD_ITEM);
7554           Item_field *const lhs_column= static_cast<Item_field*>(lhs_item);
7555           // j goes from 1 since arguments()[0] is the lhs of IN.
7556           for (uint j= 1; j < cond_func->argument_count(); ++j)
7557           {
7558             // Here we pick out the i:th column in the j:th row.
7559             Item *rhs_item= cond_func->arguments()[j];
7560             assert(rhs_item->type() == Item::ROW_ITEM);
7561             Item_row *rhs_row= static_cast<Item_row*>(rhs_item);
7562             assert(rhs_row->cols() == lhs_row->cols());
7563             Item **rhs_expr_ptr= rhs_row->addr(i);
7564             /*
7565               add_key_field() will write a Key_field on each call
7566               here, but we don't care, it will never be used. We only
7567               call it for the side effect: update JOIN_TAB::const_keys
7568               so the range optimizer can be invoked. We pass a
7569               scrap buffer and pointer here.
7570             */
7571             Key_field scrap_key_field= **key_fields;
7572             Key_field *scrap_key_field_ptr= &scrap_key_field;
7573             add_key_field(&scrap_key_field_ptr,
7574                           *and_level,
7575                           cond_func,
7576                           lhs_column,
7577                           true, // eq_func
7578                           rhs_expr_ptr,
7579                           1, // Number of expressions: one
7580                           usable_tables,
7581                           NULL); // sargables
7582             // The pointer is not supposed to increase by more than one.
7583             assert(scrap_key_field_ptr <= &scrap_key_field + 1);
7584           }
7585         }
7586       }
7587     }
7588     break;
7589   }
7590   case Item_func::OPTIMIZE_OP:
7591   {
7592     bool equal_func=(cond_func->functype() == Item_func::EQ_FUNC ||
7593 		     cond_func->functype() == Item_func::EQUAL_FUNC);
7594 
7595     if (is_local_field (cond_func->arguments()[0]))
7596     {
7597       add_key_equal_fields(key_fields, *and_level, cond_func,
7598 	                (Item_field*) (cond_func->arguments()[0])->real_item(),
7599 		           equal_func,
7600                            cond_func->arguments()+1, 1, usable_tables,
7601                            sargables);
7602     }
7603     if (is_local_field (cond_func->arguments()[1]) &&
7604 	cond_func->functype() != Item_func::LIKE_FUNC)
7605     {
7606       add_key_equal_fields(key_fields, *and_level, cond_func,
7607                        (Item_field*) (cond_func->arguments()[1])->real_item(),
7608 		           equal_func,
7609                            cond_func->arguments(),1,usable_tables,
7610                            sargables);
7611     }
7612     break;
7613   }
7614   case Item_func::OPTIMIZE_NULL:
7615     /* column_name IS [NOT] NULL */
7616     if (is_local_field (cond_func->arguments()[0]) &&
7617 	!(cond_func->used_tables() & OUTER_REF_TABLE_BIT))
7618     {
7619       Item *tmp=new Item_null;
7620       if (unlikely(!tmp))                       // Should never be true
7621         DBUG_VOID_RETURN;
7622       add_key_equal_fields(key_fields, *and_level, cond_func,
7623 		    (Item_field*) (cond_func->arguments()[0])->real_item(),
7624 		    cond_func->functype() == Item_func::ISNULL_FUNC,
7625 			   &tmp, 1, usable_tables, sargables);
7626     }
7627     break;
7628   case Item_func::OPTIMIZE_EQUAL:
7629     Item_equal *item_equal= (Item_equal *) cond;
7630     Item *const_item= item_equal->get_const();
7631     if (const_item)
7632     {
7633       /*
7634         For each field field1 from item_equal consider the equality
7635         field1=const_item as a condition allowing an index access of the table
7636         with field1 by the keys value of field1.
7637       */
7638       Item_equal_iterator it(*item_equal);
7639       Item_field *item;
7640       while ((item= it++))
7641       {
7642         add_key_field(key_fields, *and_level, cond_func, item,
7643                       TRUE, &const_item, 1, usable_tables, sargables);
7644       }
7645     }
7646     else
7647     {
7648       /*
7649         Consider all pairs of different fields included into item_equal.
7650         For each of them (field1, field1) consider the equality
7651         field1=field2 as a condition allowing an index access of the table
7652         with field1 by the keys value of field2.
7653       */
7654       Item_equal_iterator outer_it(*item_equal);
7655       Item_equal_iterator inner_it(*item_equal);
7656       Item_field *outer;
7657       while ((outer= outer_it++))
7658       {
7659         Item_field *inner;
7660         while ((inner= inner_it++))
7661         {
7662           if (!outer->field->eq(inner->field))
7663             add_key_field(key_fields, *and_level, cond_func, outer,
7664                           true, (Item **) &inner, 1, usable_tables,
7665                           sargables);
7666         }
7667         inner_it.rewind();
7668       }
7669     }
7670     break;
7671   }
7672   DBUG_VOID_RETURN;
7673 }
7674 
7675 
7676 /*
7677   Add all keys with uses 'field' for some keypart
7678   If field->and_level != and_level then only mark key_part as const_part
7679 
7680   RETURN
7681    0 - OK
7682    1 - Out of memory.
7683 */
7684 
7685 static bool
add_key_part(Key_use_array * keyuse_array,Key_field * key_field)7686 add_key_part(Key_use_array *keyuse_array, Key_field *key_field)
7687 {
7688   if (key_field->eq_func && !(key_field->optimize & KEY_OPTIMIZE_EXISTS))
7689   {
7690     Field *const field= key_field->item_field->field;
7691     TABLE_LIST *const tl= key_field->item_field->table_ref;
7692     TABLE *const table= tl->table;
7693 
7694     for (uint key=0 ; key < table->s->keys ; key++)
7695     {
7696       if (!(table->keys_in_use_for_query.is_set(key)))
7697 	continue;
7698       if (table->key_info[key].flags & (HA_FULLTEXT | HA_SPATIAL))
7699 	continue;    // ToDo: ft-keys in non-ft queries.   SerG
7700 
7701       uint key_parts= actual_key_parts(&table->key_info[key]);
7702       for (uint part=0 ; part <  key_parts ; part++)
7703       {
7704 	if (field->eq(table->key_info[key].key_part[part].field))
7705 	{
7706           const Key_use keyuse(tl,
7707                                key_field->val,
7708                                key_field->val->used_tables(),
7709                                key,
7710                                part,
7711                                key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL,
7712                                (key_part_map) 1 << part,
7713                                ~(ha_rows) 0, // will be set in optimize_keyuse
7714                                key_field->null_rejecting,
7715                                key_field->cond_guard,
7716                                key_field->sj_pred_no);
7717           if (keyuse_array->push_back(keyuse))
7718             return true;              /* purecov: inspected */
7719 	}
7720       }
7721     }
7722   }
7723   return false;
7724 }
7725 
7726 
7727 /**
7728    Function parses WHERE condition and add key_use for FT index
7729    into key_use array if suitable MATCH function is found.
7730    Condition should be a set of AND expression, OR is not supported.
7731    MATCH function should be a part of simple expression.
7732    Simple expression is MATCH only function or MATCH is a part of
7733    comparison expression ('>=' or '>' operations are supported).
7734    It also sets FT_HINTS values(op_type, op_value).
7735 
7736    @param keyuse_array      Key_use array
7737    @param stat              JOIN_TAB structure
7738    @param cond              WHERE condition
7739    @param usable_tables     usable tables
7740    @param simple_match_expr true if this is the first call false otherwise.
7741                             if MATCH function is found at first call it means
7742                             that MATCH is simple expression, otherwise, in case
7743                             of AND/OR condition this parameter will be false.
7744 
7745    @retval
7746    true if FT key was added to Key_use array
7747    @retval
7748    false if no key was added to Key_use array
7749 
7750 */
7751 
7752 static bool
add_ft_keys(Key_use_array * keyuse_array,JOIN_TAB * stat,Item * cond,table_map usable_tables,bool simple_match_expr)7753 add_ft_keys(Key_use_array *keyuse_array,
7754             JOIN_TAB *stat,Item *cond,table_map usable_tables,
7755             bool simple_match_expr)
7756 {
7757   Item_func_match *cond_func=NULL;
7758 
7759   if (!cond)
7760     return FALSE;
7761 
7762   if (cond->type() == Item::FUNC_ITEM)
7763   {
7764     Item_func *func=(Item_func *)cond;
7765     Item_func::Functype functype=  func->functype();
7766     enum ft_operation op_type= FT_OP_NO;
7767     double op_value= 0.0;
7768     if (functype == Item_func::FT_FUNC)
7769     {
7770       cond_func= ((Item_func_match *) cond)->get_master();
7771       cond_func->set_hints_op(op_type, op_value);
7772     }
7773     else if (func->arg_count == 2)
7774     {
7775       Item *arg0=(func->arguments()[0]),
7776            *arg1=(func->arguments()[1]);
7777       if (arg1->const_item() &&
7778            arg0->type() == Item::FUNC_ITEM &&
7779            ((Item_func *) arg0)->functype() == Item_func::FT_FUNC &&
7780           ((functype == Item_func::GE_FUNC &&
7781             (op_value= arg1->val_real()) > 0) ||
7782            (functype == Item_func::GT_FUNC &&
7783             (op_value= arg1->val_real()) >=0)))
7784       {
7785         cond_func= ((Item_func_match *) arg0)->get_master();
7786         if (functype == Item_func::GE_FUNC)
7787           op_type= FT_OP_GE;
7788         else if (functype == Item_func::GT_FUNC)
7789           op_type= FT_OP_GT;
7790         cond_func->set_hints_op(op_type, op_value);
7791       }
7792       else if (arg0->const_item() &&
7793                 arg1->type() == Item::FUNC_ITEM &&
7794                 ((Item_func *) arg1)->functype() == Item_func::FT_FUNC &&
7795                ((functype == Item_func::LE_FUNC &&
7796                  (op_value= arg0->val_real()) > 0) ||
7797                 (functype == Item_func::LT_FUNC &&
7798                  (op_value= arg0->val_real()) >=0)))
7799       {
7800         cond_func= ((Item_func_match *) arg1)->get_master();
7801         if (functype == Item_func::LE_FUNC)
7802           op_type= FT_OP_GE;
7803         else if (functype == Item_func::LT_FUNC)
7804           op_type= FT_OP_GT;
7805         cond_func->set_hints_op(op_type, op_value);
7806       }
7807     }
7808   }
7809   else if (cond->type() == Item::COND_ITEM)
7810   {
7811     List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
7812 
7813     if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
7814     {
7815       Item *item;
7816       while ((item=li++))
7817       {
7818         if (add_ft_keys(keyuse_array, stat, item, usable_tables, false))
7819           return TRUE;
7820       }
7821     }
7822   }
7823 
7824   if (!cond_func || cond_func->key == NO_SUCH_KEY ||
7825       !(usable_tables & cond_func->table_ref->map()))
7826     return FALSE;
7827 
7828   cond_func->set_simple_expression(simple_match_expr);
7829 
7830   const Key_use keyuse(cond_func->table_ref,
7831                        cond_func,
7832                        cond_func->key_item()->used_tables(),
7833                        cond_func->key,
7834                        FT_KEYPART,
7835                        0,             // optimize
7836                        0,             // keypart_map
7837                        ~(ha_rows)0,   // ref_table_rows
7838                        false,         // null_rejecting
7839                        NULL,          // cond_guard
7840                        UINT_MAX);     // sj_pred_no
7841   return keyuse_array->push_back(keyuse);
7842 }
7843 
7844 
7845 /**
7846   Compares two keyuse elements.
7847 
7848   @param a first Key_use element
7849   @param b second Key_use element
7850 
7851   Compare Key_use elements so that they are sorted as follows:
7852     -# By table.
7853     -# By key for each table.
7854     -# By keypart for each key.
7855     -# Const values.
7856     -# Ref_or_null.
7857 
7858   @retval  0 If a = b.
7859   @retval <0 If a < b.
7860   @retval >0 If a > b.
7861 */
sort_keyuse(Key_use * a,Key_use * b)7862 static int sort_keyuse(Key_use *a, Key_use *b)
7863 {
7864   int res;
7865   if (a->table_ref->tableno() != b->table_ref->tableno())
7866     return (int) (a->table_ref->tableno() - b->table_ref->tableno());
7867   if (a->key != b->key)
7868     return (int) (a->key - b->key);
7869   if (a->keypart != b->keypart)
7870     return (int) (a->keypart - b->keypart);
7871   // Place const values before other ones
7872   if ((res= MY_TEST((a->used_tables & ~OUTER_REF_TABLE_BIT)) -
7873        MY_TEST((b->used_tables & ~OUTER_REF_TABLE_BIT))))
7874     return res;
7875   /* Place rows that are not 'OPTIMIZE_REF_OR_NULL' first */
7876   return (int) ((a->optimize & KEY_OPTIMIZE_REF_OR_NULL) -
7877 		(b->optimize & KEY_OPTIMIZE_REF_OR_NULL));
7878 }
7879 
7880 
7881 /*
7882   Add to Key_field array all 'ref' access candidates within nested join.
7883 
7884     This function populates Key_field array with entries generated from the
7885     ON condition of the given nested join, and does the same for nested joins
7886     contained within this nested join.
7887 
7888   @param[in]      nested_join_table   Nested join pseudo-table to process
7889   @param[in,out]  end                 End of the key field array
7890   @param[in,out]  and_level           And-level
7891   @param[in,out]  sargables           Array of found sargable candidates
7892 
7893 
7894   @note
7895     We can add accesses to the tables that are direct children of this nested
7896     join (1), and are not inner tables w.r.t their neighbours (2).
7897 
7898     Example for #1 (outer brackets pair denotes nested join this function is
7899     invoked for):
7900     @code
7901      ... LEFT JOIN (t1 LEFT JOIN (t2 ... ) ) ON cond
7902     @endcode
7903     Example for #2:
7904     @code
7905      ... LEFT JOIN (t1 LEFT JOIN t2 ) ON cond
7906     @endcode
7907     In examples 1-2 for condition cond, we can add 'ref' access candidates to
7908     t1 only.
7909     Example #3:
7910     @code
7911      ... LEFT JOIN (t1, t2 LEFT JOIN t3 ON inner_cond) ON cond
7912     @endcode
7913     Here we can add 'ref' access candidates for t1 and t2, but not for t3.
7914 */
7915 
add_key_fields_for_nj(JOIN * join,TABLE_LIST * nested_join_table,Key_field ** end,uint * and_level,SARGABLE_PARAM ** sargables)7916 static void add_key_fields_for_nj(JOIN *join, TABLE_LIST *nested_join_table,
7917                                   Key_field **end, uint *and_level,
7918                                   SARGABLE_PARAM **sargables)
7919 {
7920   List_iterator<TABLE_LIST> li(nested_join_table->nested_join->join_list);
7921   List_iterator<TABLE_LIST> li2(nested_join_table->nested_join->join_list);
7922   bool have_another = FALSE;
7923   table_map tables= 0;
7924   TABLE_LIST *table;
7925   assert(nested_join_table->nested_join);
7926 
7927   while ((table= li++) || (have_another && (li=li2, have_another=FALSE,
7928                                             (table= li++))))
7929   {
7930     if (table->nested_join)
7931     {
7932       if (!table->join_cond_optim())
7933       {
7934         /* It's a semi-join nest. Walk into it as if it wasn't a nest */
7935         have_another= TRUE;
7936         li2= li;
7937         li= List_iterator<TABLE_LIST>(table->nested_join->join_list);
7938       }
7939       else
7940         add_key_fields_for_nj(join, table, end, and_level, sargables);
7941     }
7942     else
7943       if (!table->join_cond_optim())
7944         tables|= table->map();
7945   }
7946   if (nested_join_table->join_cond_optim())
7947     add_key_fields(join, end, and_level, nested_join_table->join_cond_optim(),
7948                    tables, sargables);
7949 }
7950 
7951 
7952 ///  @} (end of group RefOptimizerModule)
7953 
7954 
7955 /**
7956   Check for the presence of AGGFN(DISTINCT a) queries that may be subject
7957   to loose index scan.
7958 
7959 
7960   Check if the query is a subject to AGGFN(DISTINCT) using loose index scan
7961   (QUICK_GROUP_MIN_MAX_SELECT).
7962   Optionally (if out_args is supplied) will push the arguments of
7963   AGGFN(DISTINCT) to the list
7964 
7965   Check for every COUNT(DISTINCT), AVG(DISTINCT) or
7966   SUM(DISTINCT). These can be resolved by Loose Index Scan as long
7967   as all the aggregate distinct functions refer to the same
7968   fields. Thus:
7969 
7970   SELECT AGGFN(DISTINCT a, b), AGGFN(DISTINCT b, a)... => can use LIS
7971   SELECT AGGFN(DISTINCT a),    AGGFN(DISTINCT a)   ... => can use LIS
7972   SELECT AGGFN(DISTINCT a, b), AGGFN(DISTINCT a)   ... => cannot use LIS
7973   SELECT AGGFN(DISTINCT a),    AGGFN(DISTINCT b)   ... => cannot use LIS
7974   etc.
7975 
7976   @param      join       the join to check
7977   @param[out] out_args   Collect the arguments of the aggregate functions
7978                          to a list. We don't worry about duplicates as
7979                          these will be sorted out later in
7980                          get_best_group_min_max.
7981 
7982   @return                does the query qualify for indexed AGGFN(DISTINCT)
7983     @retval   true       it does
7984     @retval   false      AGGFN(DISTINCT) must apply distinct in it.
7985 */
7986 
7987 bool
is_indexed_agg_distinct(JOIN * join,List<Item_field> * out_args)7988 is_indexed_agg_distinct(JOIN *join, List<Item_field> *out_args)
7989 {
7990   Item_sum **sum_item_ptr;
7991   bool result= false;
7992   Field_map first_aggdistinct_fields;
7993 
7994   if (join->primary_tables > 1 ||             /* reference more than 1 table */
7995       join->select_distinct ||                /* or a DISTINCT */
7996       join->select_lex->olap == ROLLUP_TYPE)  /* Check (B3) for ROLLUP */
7997     return false;
7998 
7999   if (join->make_sum_func_list(join->all_fields, join->fields_list, true))
8000     return false;
8001 
8002   for (sum_item_ptr= join->sum_funcs; *sum_item_ptr; sum_item_ptr++)
8003   {
8004     Item_sum *sum_item= *sum_item_ptr;
8005     Field_map cur_aggdistinct_fields;
8006     Item *expr;
8007     /* aggregate is not AGGFN(DISTINCT) or more than 1 argument to it */
8008     switch (sum_item->sum_func())
8009     {
8010       case Item_sum::MIN_FUNC:
8011       case Item_sum::MAX_FUNC:
8012         continue;
8013       case Item_sum::COUNT_DISTINCT_FUNC:
8014         break;
8015       case Item_sum::AVG_DISTINCT_FUNC:
8016       case Item_sum::SUM_DISTINCT_FUNC:
8017         if (sum_item->get_arg_count() == 1)
8018           break;
8019         /* fall through */
8020       default: return false;
8021     }
8022 
8023     for (uint i= 0; i < sum_item->get_arg_count(); i++)
8024     {
8025       expr= sum_item->get_arg(i);
8026       /* The AGGFN(DISTINCT) arg is not an attribute? */
8027       if (expr->real_item()->type() != Item::FIELD_ITEM)
8028         return false;
8029 
8030       Item_field* item= static_cast<Item_field*>(expr->real_item());
8031       if (out_args)
8032         out_args->push_back(item);
8033 
8034       cur_aggdistinct_fields.set_bit(item->field->field_index);
8035       result= true;
8036     }
8037     /*
8038       If there are multiple aggregate functions, make sure that they all
8039       refer to exactly the same set of columns.
8040     */
8041     if (first_aggdistinct_fields.is_clear_all())
8042       first_aggdistinct_fields.merge(cur_aggdistinct_fields);
8043     else if (first_aggdistinct_fields != cur_aggdistinct_fields)
8044       return false;
8045   }
8046 
8047   return result;
8048 }
8049 
8050 
8051 /**
8052   Print keys that were appended to join_tab->const_keys because they
8053   can be used for GROUP BY or DISTINCT to the optimizer trace.
8054 
8055   @param trace     The optimizer trace context we're adding info to
8056   @param join_tab  The table the indexes cover
8057   @param new_keys  The keys that are considered useful because they can
8058                    be used for GROUP BY or DISTINCT
8059   @param cause     Zero-terminated string with reason for adding indexes
8060                    to const_keys
8061 
8062   @see add_group_and_distinct_keys()
8063  */
trace_indexes_added_group_distinct(Opt_trace_context * trace,const JOIN_TAB * join_tab,const key_map new_keys,const char * cause)8064 static void trace_indexes_added_group_distinct(Opt_trace_context *trace,
8065                                                const JOIN_TAB *join_tab,
8066                                                const key_map new_keys,
8067                                                const char* cause)
8068 {
8069 #ifdef OPTIMIZER_TRACE
8070   if (likely(!trace->is_started()))
8071     return;
8072 
8073   KEY *key_info= join_tab->table()->key_info;
8074   key_map existing_keys= join_tab->const_keys;
8075   uint nbrkeys= join_tab->table()->s->keys;
8076 
8077   Opt_trace_object trace_summary(trace, "const_keys_added");
8078   {
8079     Opt_trace_array trace_key(trace,"keys");
8080     for (uint j= 0 ; j < nbrkeys ; j++)
8081       if (new_keys.is_set(j) && !existing_keys.is_set(j))
8082         trace_key.add_utf8(key_info[j].name);
8083   }
8084   trace_summary.add_alnum("cause", cause);
8085 #endif
8086 }
8087 
8088 
8089 /**
8090   Discover the indexes that might be used for GROUP BY or DISTINCT queries.
8091 
8092   If the query has a GROUP BY clause, find all indexes that contain
8093   all GROUP BY fields, and add those indexes to join_tab->const_keys
8094   and join_tab->keys.
8095 
8096   If the query has a DISTINCT clause, find all indexes that contain
8097   all SELECT fields, and add those indexes to join_tab->const_keys and
8098   join_tab->keys. This allows later on such queries to be processed by
8099   a QUICK_GROUP_MIN_MAX_SELECT.
8100 
8101   Note that indexes that are not usable for resolving GROUP
8102   BY/DISTINCT may also be added in some corner cases. For example, an
8103   index covering 'a' and 'b' is not usable for the following query but
8104   is still added: "SELECT DISTINCT a+b FROM t1". This is not a big
8105   issue because a) although the optimizer will consider using the
8106   index, it will not chose it (so minor calculation cost added but not
8107   wrong result) and b) it applies only to corner cases.
8108 
8109   @param join
8110   @param join_tab
8111 
8112   @return
8113     None
8114 */
8115 
8116 static void
add_group_and_distinct_keys(JOIN * join,JOIN_TAB * join_tab)8117 add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab)
8118 {
8119   assert(join_tab->const_keys.is_subset(join_tab->keys()));
8120 
8121   List<Item_field> indexed_fields;
8122   List_iterator<Item_field> indexed_fields_it(indexed_fields);
8123   ORDER      *cur_group;
8124   Item_field *cur_item;
8125   const char *cause;
8126 
8127   if (join->group_list)
8128   { /* Collect all query fields referenced in the GROUP clause. */
8129     for (cur_group= join->group_list; cur_group; cur_group= cur_group->next)
8130       (*cur_group->item)->walk(&Item::collect_item_field_processor,
8131                                Item::WALK_POSTFIX,
8132                                (uchar*) &indexed_fields);
8133     cause= "group_by";
8134   }
8135   else if (join->select_distinct)
8136   { /* Collect all query fields referenced in the SELECT clause. */
8137     List<Item> &select_items= join->fields_list;
8138     List_iterator<Item> select_items_it(select_items);
8139     Item *item;
8140     while ((item= select_items_it++))
8141       item->walk(&Item::collect_item_field_processor,
8142                  Item::WALK_POSTFIX,
8143                  (uchar*) &indexed_fields);
8144     cause= "distinct";
8145   }
8146   else if (join->tmp_table_param.sum_func_count &&
8147            is_indexed_agg_distinct(join, &indexed_fields))
8148   {
8149     /*
8150       SELECT list with AGGFN(distinct col). The query qualifies for
8151       loose index scan, and is_indexed_agg_distinct() has already
8152       collected all referenced fields into indexed_fields.
8153     */
8154     join->sort_and_group= 1;
8155     cause= "indexed_distinct_aggregate";
8156   }
8157   else
8158     return;
8159 
8160   if (indexed_fields.elements == 0)
8161     return;
8162 
8163   key_map possible_keys;
8164   possible_keys.set_all();
8165 
8166   /* Intersect the keys of all group fields. */
8167   while ((cur_item= indexed_fields_it++))
8168   {
8169     if (cur_item->used_tables() != join_tab->table_ref->map())
8170     {
8171       /*
8172         Doing GROUP BY or DISTINCT on a field in another table so no
8173         index in this table is usable
8174       */
8175       return;
8176     }
8177     else
8178       possible_keys.intersect(cur_item->field->part_of_key);
8179   }
8180 
8181   /*
8182     At this point, possible_keys has key bits set only for usable
8183     indexes because indexed_fields is non-empty and if any of the
8184     fields belong to a different table the function would exit in the
8185     loop above.
8186   */
8187 
8188   if (!possible_keys.is_clear_all() &&
8189       !possible_keys.is_subset(join_tab->const_keys))
8190   {
8191     trace_indexes_added_group_distinct(&join->thd->opt_trace, join_tab,
8192                                        possible_keys, cause);
8193     join_tab->const_keys.merge(possible_keys);
8194     join_tab->keys().merge(possible_keys);
8195   }
8196 
8197   assert(join_tab->const_keys.is_subset(join_tab->keys()));
8198 }
8199 
8200 /**
8201   Update keyuse array with all possible keys we can use to fetch rows.
8202 
8203   @param       thd
8204   @param[out]  keyuse         Put here ordered array of Key_use structures
8205   @param       join_tab       Array in table number order
8206   @param       tables         Number of tables in join
8207   @param       cond           WHERE condition (note that the function analyzes
8208                               join_tab[i]->join_cond() too)
8209   @param       normal_tables  Tables not inner w.r.t some outer join (ones
8210                               for which we can make ref access based the WHERE
8211                               clause)
8212   @param       select_lex     current SELECT
8213   @param[out]  sargables      Array of found sargable candidates
8214 
8215    @retval
8216      0  OK
8217    @retval
8218      1  Out of memory.
8219 */
8220 
8221 static bool
update_ref_and_keys(THD * thd,Key_use_array * keyuse,JOIN_TAB * join_tab,uint tables,Item * cond,COND_EQUAL * cond_equal,table_map normal_tables,SELECT_LEX * select_lex,SARGABLE_PARAM ** sargables)8222 update_ref_and_keys(THD *thd, Key_use_array *keyuse,JOIN_TAB *join_tab,
8223                     uint tables, Item *cond, COND_EQUAL *cond_equal,
8224                     table_map normal_tables, SELECT_LEX *select_lex,
8225                     SARGABLE_PARAM **sargables)
8226 {
8227   uint	and_level,i,found_eq_constant;
8228   Key_field *key_fields, *end, *field;
8229   size_t sz;
8230   uint m= max(select_lex->max_equal_elems, 1U);
8231   JOIN *const join= select_lex->join;
8232   /*
8233     We use the same piece of memory to store both  Key_field
8234     and SARGABLE_PARAM structure.
8235     Key_field values are placed at the beginning this memory
8236     while  SARGABLE_PARAM values are put at the end.
8237     All predicates that are used to fill arrays of Key_field
8238     and SARGABLE_PARAM structures have at most 2 arguments
8239     except BETWEEN predicates that have 3 arguments and
8240     IN predicates.
8241     This any predicate if it's not BETWEEN/IN can be used
8242     directly to fill at most 2 array elements, either of Key_field
8243     or SARGABLE_PARAM type. For a BETWEEN predicate 3 elements
8244     can be filled as this predicate is considered as
8245     saragable with respect to each of its argument.
8246     An IN predicate can require at most 1 element as currently
8247     it is considered as sargable only for its first argument.
8248     Multiple equality can add  elements that are filled after
8249     substitution of field arguments by equal fields. There
8250     can be not more than select_lex->max_equal_elems such
8251     substitutions.
8252   */
8253   sz= max(sizeof(Key_field), sizeof(SARGABLE_PARAM)) *
8254     (((select_lex->cond_count + 1) * 2 +
8255       select_lex->between_count) * m + 1);
8256   if (!(key_fields=(Key_field*)	thd->alloc(sz)))
8257     return TRUE; /* purecov: inspected */
8258   and_level= 0;
8259   field= end= key_fields;
8260   *sargables= (SARGABLE_PARAM *) key_fields +
8261     (sz - sizeof((*sargables)[0].field))/sizeof(SARGABLE_PARAM);
8262   /* set a barrier for the array of SARGABLE_PARAM */
8263   (*sargables)[0].field= 0;
8264 
8265   if (cond)
8266   {
8267     add_key_fields(join, &end, &and_level, cond, normal_tables, sargables);
8268     for (Key_field *fld= field; fld != end ; fld++)
8269     {
8270       /* Mark that we can optimize LEFT JOIN */
8271       if (fld->val->type() == Item::NULL_ITEM &&
8272           !fld->item_field->field->real_maybe_null())
8273       {
8274         /*
8275           Example:
8276           SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.a WHERE t2.a IS NULL;
8277           this just wants rows of t1 where t1.a does not exist in t2.
8278         */
8279         fld->item_field->field->table->reginfo.not_exists_optimize= true;
8280       }
8281     }
8282   }
8283 
8284   for (i=0 ; i < tables ; i++)
8285   {
8286     /*
8287       Block the creation of keys for inner tables of outer joins.
8288       Here only the outer joins that can not be converted to
8289       inner joins are left and all nests that can be eliminated
8290       are flattened.
8291       In the future when we introduce conditional accesses
8292       for inner tables in outer joins these keys will be taken
8293       into account as well.
8294     */
8295     if (join_tab[i].join_cond())
8296       add_key_fields(join, &end, &and_level,
8297                      join_tab[i].join_cond(),
8298                      join_tab[i].table_ref->map(), sargables);
8299   }
8300 
8301   /* Process ON conditions for the nested joins */
8302   {
8303     List_iterator<TABLE_LIST> li(select_lex->top_join_list);
8304     TABLE_LIST *tl;
8305     while ((tl= li++))
8306     {
8307       if (tl->nested_join)
8308         add_key_fields_for_nj(join, tl, &end, &and_level, sargables);
8309     }
8310   }
8311 
8312   /* Generate keys descriptions for derived tables */
8313   if (select_lex->materialized_derived_table_count)
8314   {
8315     if (join->generate_derived_keys())
8316       return true;
8317   }
8318   /* fill keyuse with found key parts */
8319   for ( ; field != end ; field++)
8320   {
8321     if (add_key_part(keyuse,field))
8322       return true;
8323   }
8324 
8325   if (select_lex->ftfunc_list->elements)
8326   {
8327     if (add_ft_keys(keyuse, join_tab, cond, normal_tables, true))
8328       return true;
8329   }
8330 
8331   /*
8332     Sort the array of possible keys and remove the following key parts:
8333     - ref if there is a keypart which is a ref and a const.
8334       (e.g. if there is a key(a,b) and the clause is a=3 and b=7 and b=t2.d,
8335       then we skip the key part corresponding to b=t2.d)
8336     - keyparts without previous keyparts
8337       (e.g. if there is a key(a,b,c) but only b < 5 (or a=2 and c < 3) is
8338       used in the query, we drop the partial key parts from consideration).
8339     Special treatment for ft-keys.
8340   */
8341   if (!keyuse->empty())
8342   {
8343     Key_use *save_pos, *use;
8344 
8345     my_qsort(keyuse->begin(), keyuse->size(), keyuse->element_size(),
8346              reinterpret_cast<qsort_cmp>(sort_keyuse));
8347 
8348     const Key_use key_end(NULL, NULL, 0, 0, 0, 0, 0, 0, false, NULL, 0);
8349     if (keyuse->push_back(key_end)) // added for easy testing
8350       return TRUE;
8351 
8352     use= save_pos= keyuse->begin();
8353     const Key_use *prev= &key_end;
8354     found_eq_constant=0;
8355     for (i=0 ; i < keyuse->size()-1 ; i++,use++)
8356     {
8357       TABLE *const table= use->table_ref->table;
8358       if (!use->used_tables && use->optimize != KEY_OPTIMIZE_REF_OR_NULL)
8359         table->const_key_parts[use->key]|= use->keypart_map;
8360       if (use->keypart != FT_KEYPART)
8361       {
8362         if (use->key == prev->key && use->table_ref == prev->table_ref)
8363         {
8364           if (prev->keypart+1 < use->keypart ||
8365               (prev->keypart == use->keypart && found_eq_constant))
8366             continue; /* remove */
8367         }
8368         else if (use->keypart != 0) // First found must be 0
8369           continue;
8370       }
8371 
8372 #if defined(__GNUC__) && !MY_GNUC_PREREQ(4,4)
8373       /*
8374         Old gcc used a memcpy(), which is undefined if save_pos==use:
8375         http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19410
8376         http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39480
8377       */
8378       if (save_pos != use)
8379 #endif
8380         *save_pos= *use;
8381       prev=use;
8382       found_eq_constant= !use->used_tables;
8383       /* Save ptr to first use */
8384       if (!table->reginfo.join_tab->keyuse())
8385         table->reginfo.join_tab->set_keyuse(save_pos);
8386       table->reginfo.join_tab->checked_keys.set_bit(use->key);
8387       save_pos++;
8388     }
8389     i= (uint) (save_pos - keyuse->begin());
8390     keyuse->at(i) = key_end;
8391     keyuse->chop(i);
8392   }
8393   print_keyuse_array(&thd->opt_trace, keyuse);
8394 
8395   return false;
8396 }
8397 
8398 
8399 /**
8400   Create a keyuse array for a table with a primary key.
8401   To be used when creating a materialized temporary table.
8402 
8403   @param thd         THD pointer, for memory allocation
8404   @param table       Table object representing table
8405   @param keyparts    Number of key parts in the primary key
8406   @param outer_exprs List of items used for key lookup
8407 
8408   @return Pointer to created keyuse array, or NULL if error
8409 */
create_keyuse_for_table(THD * thd,TABLE * table,uint keyparts,Item_field ** fields,List<Item> outer_exprs)8410 Key_use_array *create_keyuse_for_table(THD *thd, TABLE *table, uint keyparts,
8411                                        Item_field **fields,
8412                                        List<Item> outer_exprs)
8413 {
8414   void *mem= thd->alloc(sizeof(Key_use_array));
8415   if (!mem)
8416     return NULL;
8417   Key_use_array *keyuses= new (mem) Key_use_array(thd->mem_root);
8418 
8419   List_iterator<Item> outer_expr(outer_exprs);
8420 
8421   for (uint keypartno= 0; keypartno < keyparts; keypartno++)
8422   {
8423     Item *const item= outer_expr++;
8424     Key_field key_field(fields[keypartno], item, 0, 0, true,
8425                         // null_rejecting must be true for field items only,
8426                         // add_not_null_conds() is incapable of handling
8427                         // other item types.
8428                         (item->type() == Item::FIELD_ITEM),
8429                         NULL, UINT_MAX);
8430     if (add_key_part(keyuses, &key_field))
8431       return NULL;
8432   }
8433   const Key_use key_end(NULL, NULL, 0, 0, 0, 0, 0, 0, false, NULL, 0);
8434   if (keyuses->push_back(key_end)) // added for easy testing
8435     return NULL;
8436 
8437   return keyuses;
8438 }
8439 
8440 
8441 /**
8442   Move const tables first in the position array.
8443 
8444   Increment the number of const tables and set same basic properties for the
8445   const table.
8446   A const table looked up by a key has type JT_CONST.
8447   A const table with a single row has type JT_SYSTEM.
8448 
8449   @param tab    Table that is designated as a const table
8450   @param key    The key definition to use for this table (NULL if table scan)
8451 */
8452 
mark_const_table(JOIN_TAB * tab,Key_use * key)8453 void JOIN::mark_const_table(JOIN_TAB *tab, Key_use *key)
8454 {
8455   POSITION *const position= positions + const_tables;
8456   position->table= tab;
8457   position->key= key;
8458   position->rows_fetched= 1.0;               // This is a const table
8459   position->filter_effect= 1.0;
8460   position->prefix_rowcount= 1.0;
8461   position->read_cost= 0.0;
8462   position->ref_depend_map= 0;
8463   position->loosescan_key= MAX_KEY;    // Not a LooseScan
8464   position->sj_strategy= SJ_OPT_NONE;
8465   positions->use_join_buffer= false;
8466 
8467   // Move the const table as far down as possible in best_ref
8468   JOIN_TAB **pos= best_ref + const_tables + 1;
8469   for (JOIN_TAB *next= best_ref[const_tables]; next != tab; pos++)
8470   {
8471     JOIN_TAB *const tmp= pos[0];
8472     pos[0]= next;
8473     next= tmp;
8474   }
8475   best_ref[const_tables]= tab;
8476 
8477   tab->set_type(key ? JT_CONST : JT_SYSTEM);
8478 
8479   const_table_map|= tab->table_ref->map();
8480 
8481   const_tables++;
8482 }
8483 
8484 
make_outerjoin_info()8485 void JOIN::make_outerjoin_info()
8486 {
8487   DBUG_ENTER("JOIN::make_outerjoin_info");
8488 
8489   assert(select_lex->outer_join);
8490   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
8491 
8492   select_lex->reset_nj_counters();
8493 
8494   for (uint i= const_tables; i < tables; ++i)
8495   {
8496     JOIN_TAB *const tab= best_ref[i];
8497     TABLE *const table= tab->table();
8498     if (!table)
8499       continue;
8500 
8501     TABLE_LIST *const tbl= tab->table_ref;
8502 
8503     if (tbl->outer_join)
8504     {
8505       /*
8506         Table tab is the only one inner table for outer join.
8507         (Like table t4 for the table reference t3 LEFT JOIN t4 ON t3.a=t4.a
8508         is in the query above.)
8509       */
8510       tab->set_last_inner(i);
8511       tab->set_first_inner(i);
8512       tab->init_join_cond_ref(tbl);
8513       tab->cond_equal= tbl->cond_equal;
8514       /*
8515         If this outer join nest is embedded in another join nest,
8516         link the join-tabs:
8517       */
8518       TABLE_LIST *const outer_join_nest= tbl->outer_join_nest();
8519       if (outer_join_nest)
8520         tab->set_first_upper(outer_join_nest->nested_join->first_nested);
8521     }
8522     for (TABLE_LIST *embedding= tbl->embedding;
8523          embedding;
8524          embedding= embedding->embedding)
8525     {
8526       // Ignore join nests that are not outer join nests:
8527       if (!embedding->join_cond_optim())
8528         continue;
8529       NESTED_JOIN *const nested_join= embedding->nested_join;
8530       if (!nested_join->nj_counter)
8531       {
8532         /*
8533           Table tab is the first inner table for nested_join.
8534           Save reference to it in the nested join structure.
8535         */
8536         nested_join->first_nested= i;
8537         tab->init_join_cond_ref(embedding);
8538         tab->cond_equal= tbl->cond_equal;
8539 
8540         TABLE_LIST *const outer_join_nest= embedding->outer_join_nest();
8541         if (outer_join_nest)
8542           tab->set_first_upper(outer_join_nest->nested_join->first_nested);
8543       }
8544       if (tab->first_inner() == NO_PLAN_IDX)
8545         tab->set_first_inner(nested_join->first_nested);
8546       if (++nested_join->nj_counter < nested_join->nj_total)
8547         break;
8548       // Table tab is the last inner table for nested join.
8549       best_ref[nested_join->first_nested]->set_last_inner(i);
8550     }
8551   }
8552   DBUG_VOID_RETURN;
8553 }
8554 
8555 /**
8556   Build a condition guarded by match variables for embedded outer joins.
8557   When generating a condition for a table as part of an outer join condition
8558   or the WHERE condition, the table in question may also be part of an
8559   embedded outer join. In such cases, the condition must be guarded by
8560   the match variable for this embedded outer join. Such embedded outer joins
8561   may also be recursively embedded in other joins.
8562 
8563   The function recursively adds guards for a condition ascending from tab
8564   to root_tab, which is the first inner table of an outer join,
8565   or NULL if the condition being handled is the WHERE clause.
8566 
8567   @param idx       index of the first inner table for the inner-most outer join
8568   @param cond      the predicate to be guarded (must be set)
8569   @param root_idx  index of the inner table to stop at
8570                    (is NO_PLAN_IDX if this is the WHERE clause)
8571 
8572   @return
8573     -  pointer to the guarded predicate, if success
8574     -  NULL if error
8575 */
8576 
8577 static Item*
add_found_match_trig_cond(JOIN * join,plan_idx idx,Item * cond,plan_idx root_idx)8578 add_found_match_trig_cond(JOIN *join, plan_idx idx, Item *cond,
8579                           plan_idx root_idx)
8580 {
8581   ASSERT_BEST_REF_IN_JOIN_ORDER(join);
8582   assert(cond);
8583 
8584   for ( ; idx != root_idx; idx= join->best_ref[idx]->first_upper())
8585   {
8586     if (!(cond= new Item_func_trig_cond(cond, NULL, join, idx,
8587                                         Item_func_trig_cond::FOUND_MATCH)))
8588       return NULL;
8589 
8590     cond->quick_fix_field();
8591     cond->update_used_tables();
8592   }
8593 
8594   return cond;
8595 }
8596 
8597 
8598 /**
8599   Attach outer join conditions to generated table conditions in an optimal way.
8600 
8601   @param last_tab - Last table that has been added to the current plan.
8602                     Pre-condition: If this is the last inner table of an outer
8603                     join operation, a join condition is attached to the first
8604                     inner table of that outer join operation.
8605 
8606   @return false if success, true if error.
8607 
8608   Outer join conditions are attached to individual tables, but we can analyze
8609   those conditions only when reaching the last inner table of an outer join
8610   operation. Notice also that a table can be last within several outer join
8611   nests, hence the outer for() loop of this function.
8612 
8613   Example:
8614     SELECT * FROM t1 LEFT JOIN (t2 LEFT JOIN t3 ON t2.a=t3.a) ON t1.a=t2.a
8615 
8616     Table t3 is last both in the join nest (t2 - t3) and in (t1 - (t2 - t3))
8617     Thus, join conditions for both join nests will be evaluated when reaching
8618     this table.
8619 
8620   For each outer join operation processed, the join condition is split
8621   optimally over the inner tables of the outer join. The split-out conditions
8622   are later referred to as table conditions (but note that several table
8623   conditions stemming from different join operations may be combined into
8624   a composite table condition).
8625 
8626   Example:
8627     Consider the above query once more.
8628     The predicate t1.a=t2.a can be evaluated when rows from t1 and t2 are ready,
8629     ie at table t2. The predicate t2.a=t3.a can be evaluated at table t3.
8630 
8631   Each non-constant split-out table condition is guarded by a match variable
8632   that enables it only when a matching row is found for all the embedded
8633   outer join operations.
8634 
8635   Each split-out table condition is guarded by a variable that turns the
8636   condition off just before a null-complemented row for the outer join
8637   operation is formed. Thus, the join condition will not be checked for
8638   the null-complemented row.
8639 */
8640 
attach_join_conditions(plan_idx last_tab)8641 bool JOIN::attach_join_conditions(plan_idx last_tab)
8642 {
8643   DBUG_ENTER("JOIN::attach_join_conditions");
8644   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
8645 
8646   for (plan_idx first_inner= best_ref[last_tab]->first_inner();
8647        first_inner != NO_PLAN_IDX &&
8648          best_ref[first_inner]->last_inner() == last_tab;
8649        first_inner= best_ref[first_inner]->first_upper())
8650   {
8651     /*
8652       Table last_tab is the last inner table of an outer join, locate
8653       the corresponding join condition from the first inner table of the
8654       same outer join:
8655     */
8656     Item *const join_cond= best_ref[first_inner]->join_cond();
8657     assert(join_cond);
8658     /*
8659       Add the constant part of the join condition to the first inner table
8660       of the outer join.
8661     */
8662     Item *cond= make_cond_for_table(join_cond, const_table_map,
8663                                     (table_map) 0, false);
8664     if (cond)
8665     {
8666       cond= new Item_func_trig_cond(cond, NULL, this, first_inner,
8667                                     Item_func_trig_cond::IS_NOT_NULL_COMPL);
8668       if (!cond)
8669         DBUG_RETURN(true);
8670       if (cond->fix_fields(thd, NULL))
8671         DBUG_RETURN(true);
8672 
8673       if (best_ref[first_inner]->and_with_condition(cond))
8674         DBUG_RETURN(true);
8675     }
8676     /*
8677       Split the non-constant part of the join condition into parts that
8678       can be attached to the inner tables of the outer join.
8679     */
8680     for (plan_idx i= first_inner; i <= last_tab; ++i)
8681     {
8682       table_map prefix_tables= best_ref[i]->prefix_tables();
8683       table_map added_tables= best_ref[i]->added_tables();
8684 
8685       /*
8686         When handling the first inner table of an outer join, we may also
8687         reference all tables ahead of this table:
8688       */
8689       if (i == first_inner)
8690         added_tables= prefix_tables;
8691       /*
8692         We need RAND_TABLE_BIT on the last inner table, in case there is a
8693         non-deterministic function in the join condition.
8694         (RAND_TABLE_BIT is set for the last table of the join plan,
8695          but this is not sufficient for join conditions, which may have a
8696          last inner table that is ahead of the last table of the join plan).
8697       */
8698       if (i == last_tab)
8699       {
8700         prefix_tables|= RAND_TABLE_BIT;
8701         added_tables|= RAND_TABLE_BIT;
8702       }
8703       cond= make_cond_for_table(join_cond, prefix_tables, added_tables, false);
8704       if (cond == NULL)
8705         continue;
8706       /*
8707         If the table is part of an outer join that is embedded in the
8708         outer join currently being processed, wrap the condition in
8709         triggered conditions for match variables of such embedded outer joins.
8710       */
8711       if (!(cond= add_found_match_trig_cond(this, best_ref[i]->first_inner(),
8712                                             cond, first_inner)))
8713         DBUG_RETURN(true);
8714 
8715       // Add the guard turning the predicate off for the null-complemented row.
8716       cond= new Item_func_trig_cond(cond, NULL, this, first_inner,
8717                                     Item_func_trig_cond::IS_NOT_NULL_COMPL);
8718       if (!cond)
8719         DBUG_RETURN(true);
8720       if (cond->fix_fields(thd, NULL))
8721         DBUG_RETURN(true);
8722 
8723       // Add the generated condition to the existing table condition
8724       if (best_ref[i]->and_with_condition(cond))
8725         DBUG_RETURN(true);
8726     }
8727   }
8728 
8729   DBUG_RETURN(false);
8730 }
8731 
8732 
8733 /*****************************************************************************
8734   Remove calculation with tables that aren't yet read. Remove also tests
8735   against fields that are read through key where the table is not a
8736   outer join table.
8737   We can't remove tests that are made against columns which are stored
8738   in sorted order.
8739 *****************************************************************************/
8740 
8741 static Item *
part_of_refkey(TABLE * table,TABLE_REF * ref,Field * field)8742 part_of_refkey(TABLE *table, TABLE_REF *ref, Field *field)
8743 {
8744   uint ref_parts= ref->key_parts;
8745   if (ref_parts)
8746   {
8747     if (ref->has_guarded_conds())
8748       return NULL;
8749 
8750     const KEY_PART_INFO *key_part= table->key_info[ref->key].key_part;
8751 
8752     for (uint part=0 ; part < ref_parts ; part++,key_part++)
8753       if (field->eq(key_part->field) &&
8754 	  !(key_part->key_part_flag & HA_PART_KEY_SEG))
8755 	return ref->items[part];
8756   }
8757   return NULL;
8758 }
8759 
8760 
8761 /**
8762   @return
8763     1 if right_item is used removable reference key on left_item
8764 
8765   @note see comments in make_cond_for_table_from_pred() about careful
8766   usage/modifications of test_if_ref().
8767 */
8768 
test_if_ref(Item * root_cond,Item_field * left_item,Item * right_item)8769 static bool test_if_ref(Item *root_cond,
8770                         Item_field *left_item,Item *right_item)
8771 {
8772   if (left_item->depended_from)
8773     return false; // don't even read join_tab of inner subquery!
8774   Field *field=left_item->field;
8775   JOIN_TAB *join_tab= field->table->reginfo.join_tab;
8776   if (join_tab)
8777     ASSERT_BEST_REF_IN_JOIN_ORDER(join_tab->join());
8778  // No need to change const test
8779   if (!field->table->const_table && join_tab &&
8780       (join_tab->first_inner() == NO_PLAN_IDX ||
8781        join_tab->join()->best_ref[join_tab->first_inner()]->join_cond() == root_cond) &&
8782       /* "ref_or_null" implements "x=y or x is null", not "x=y" */
8783       (join_tab->type() != JT_REF_OR_NULL))
8784   {
8785     Item *ref_item= part_of_refkey(field->table, &join_tab->ref(), field);
8786     if (ref_item && ref_item->eq(right_item,1))
8787     {
8788       right_item= right_item->real_item();
8789       if (right_item->type() == Item::FIELD_ITEM)
8790 	return (field->eq_def(((Item_field *) right_item)->field));
8791       /* remove equalities injected by IN->EXISTS transformation */
8792       else if (right_item->type() == Item::CACHE_ITEM)
8793         return ((Item_cache *)right_item)->eq_def (field);
8794       if (right_item->const_item() && !(right_item->is_null()))
8795       {
8796         /*
8797           We can remove all fields except:
8798           1. String data types:
8799            - For BINARY/VARBINARY fields with equality against a
8800              string: Ref access can return more rows than match the
8801              string. The reason seems to be that the string constant
8802              is not "padded" to the full length of the field when
8803              setting up ref access. @todo Change how ref access for
8804              BINARY/VARBINARY fields are done so that only qualifying
8805              rows are returned from the storage engine.
8806           2. Float data type: Comparison of float can differ
8807            - When we search "WHERE field=value" using an index,
8808              the "value" side is converted from double to float by
8809              Field_float::store(), then two floats are compared.
8810            - When we search "WHERE field=value" without indexes,
8811              the "field" side is converted from float to double by
8812              Field_float::val_real(), then two doubles are compared.
8813           Note about string data types: All currently existing
8814           collations have "PAD SPACE" style. If we introduce "NO PAD"
8815           collations this function must return false for such
8816           collations, because trailing space compression for indexes
8817           makes the table value and the index value not equal to each
8818           other in "NO PAD" collations. As index lookup strips
8819           trailing spaces, it can return false candidates. Further
8820           comparison of the actual table values is required.
8821         */
8822         if (!((field->type() == MYSQL_TYPE_STRING ||                       // 1
8823                field->type() == MYSQL_TYPE_VARCHAR) && field->binary()) &&
8824             !(field->type() == MYSQL_TYPE_FLOAT && field->decimals() > 0)) // 2
8825         {
8826           return !right_item->save_in_field_no_warnings(field, true);
8827         }
8828       }
8829     }
8830   }
8831   return 0;					// keep test
8832 }
8833 
8834 
8835 /*
8836   Remove the predicates pushed down into the subquery
8837 
8838   DESCRIPTION
8839     Given that this join will be executed using (unique|index)_subquery,
8840     without "checking NULL", remove the predicates that were pushed down
8841     into the subquery.
8842 
8843     If the subquery compares scalar values, we can remove the condition that
8844     was wrapped into trig_cond (it will be checked when needed by the subquery
8845     engine)
8846 
8847     If the subquery compares row values, we need to keep the wrapped
8848     equalities in the WHERE clause: when the left (outer) tuple has both NULL
8849     and non-NULL values, we'll do a full table scan and will rely on the
8850     equalities corresponding to non-NULL parts of left tuple to filter out
8851     non-matching records.
8852 
8853     If '*where' is a triggered condition, or contains 'OR x IS NULL', or
8854     contains a condition coming from the original subquery's WHERE clause, or
8855     if there are more than one outer expressions, then WHERE is not of the
8856     simple form:
8857       outer_expr = inner_expr
8858     and thus this function does nothing.
8859 
8860     If the index is on prefix (=> test_if_ref() is false), then the equality
8861     is needed as post-filter, so this function does nothing.
8862 
8863     TODO: We can remove the equalities that will be guaranteed to be true by the
8864     fact that subquery engine will be using index lookup. This must be done only
8865     for cases where there are no conversion errors of significance, e.g. 257
8866     that is searched in a byte. But this requires homogenization of the return
8867     codes of all Field*::store() methods.
8868 */
remove_subq_pushed_predicates()8869 void JOIN::remove_subq_pushed_predicates()
8870 {
8871   if (where_cond->type() != Item::FUNC_ITEM)
8872     return;
8873   Item_func *const func= static_cast<Item_func *>(where_cond);
8874   if (func->functype() == Item_func::EQ_FUNC &&
8875       func->arguments()[0]->type() == Item::REF_ITEM &&
8876       func->arguments()[1]->type() == Item::FIELD_ITEM &&
8877       test_if_ref(func,
8878                   static_cast<Item_field *>(func->arguments()[1]),
8879                   func->arguments()[0]))
8880   {
8881     where_cond= NULL;
8882     return;
8883   }
8884 }
8885 
8886 
8887 /**
8888   @brief
8889   Add keys to derived tables'/views' result tables in a list
8890 
8891   @param select_lex generate derived keys for select_lex's derived tables
8892 
8893   @details
8894   This function generates keys for all derived tables/views of the select_lex
8895   to which this join corresponds to with help of the TABLE_LIST:generate_keys
8896   function.
8897 
8898   @return FALSE all keys were successfully added.
8899   @return TRUE OOM error
8900 */
8901 
generate_derived_keys()8902 bool JOIN::generate_derived_keys()
8903 {
8904   assert(select_lex->materialized_derived_table_count);
8905 
8906   for (TABLE_LIST *table= select_lex->leaf_tables;
8907        table;
8908        table= table->next_leaf)
8909   {
8910     table->derived_keys_ready= TRUE;
8911     /* Process tables that aren't materialized yet. */
8912     if (table->uses_materialization() && !table->table->is_created() &&
8913         table->generate_keys())
8914       return TRUE;
8915   }
8916   return FALSE;
8917 }
8918 
8919 
8920 /**
8921   @brief
8922   Drop unused keys for each materialized derived table/view
8923 
8924   @details
8925   For each materialized derived table/view, call TABLE::use_index to save one
8926   index chosen by the optimizer and ignore others. If no key is chosen, then all
8927   keys will be ignored.
8928 */
8929 
drop_unused_derived_keys()8930 void JOIN::drop_unused_derived_keys()
8931 {
8932   assert(select_lex->materialized_derived_table_count);
8933   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
8934 
8935   for (uint i= 0 ; i < tables ; i++)
8936   {
8937     JOIN_TAB *tab= best_ref[i];
8938     TABLE *table= tab->table();
8939     /*
8940      Save chosen key description if:
8941      1) it's a materialized derived table
8942      2) it's not yet instantiated
8943      3) some keys are defined for it
8944     */
8945     if (table &&
8946         tab->table_ref->uses_materialization() &&               // (1)
8947         !table->is_created() &&                                 // (2)
8948         table->max_keys > 0)                                    // (3)
8949     {
8950       Key_use *keyuse= tab->position()->key;
8951 
8952       table->use_index(keyuse ? keyuse->key : -1);
8953 
8954       const bool key_is_const= keyuse && tab->const_keys.is_set(keyuse->key);
8955       tab->const_keys.clear_all();
8956       tab->keys().clear_all();
8957 
8958       if (!keyuse)
8959         continue;
8960 
8961       /*
8962         Update the selected "keyuse" to point to key number 0.
8963         Notice that unused keyuse entries still point to the deleted
8964         candidate keys. tab->keys (and tab->const_keys if the chosen key
8965         is constant) should reference key object no. 0 as well.
8966       */
8967       tab->keys().set_bit(0);
8968       if (key_is_const)
8969         tab->const_keys.set_bit(0);
8970 
8971       const uint oldkey= keyuse->key;
8972       for (; keyuse->table_ref == tab->table_ref && keyuse->key == oldkey;
8973            keyuse++)
8974         keyuse->key= 0;
8975     }
8976   }
8977 }
8978 
8979 
8980 /**
8981   Cache constant expressions in WHERE, HAVING, ON conditions.
8982 
8983   @return False if success, True if error
8984 
8985   @note This function is run after conditions have been pushed down to
8986         individual tables, so transformation is applied to JOIN_TAB::condition
8987         and not to the WHERE condition.
8988 */
8989 
cache_const_exprs()8990 bool JOIN::cache_const_exprs()
8991 {
8992   /* No need in cache if all tables are constant. */
8993   assert(!plan_is_const());
8994   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
8995 
8996   for (uint i= const_tables; i < tables; i++)
8997   {
8998     Item *condition= best_ref[i]->condition();
8999     if (condition == NULL)
9000       continue;
9001     Item *cache_item= NULL;
9002     Item **analyzer_arg= &cache_item;
9003     condition=
9004       condition->compile(&Item::cache_const_expr_analyzer,
9005                          (uchar **)&analyzer_arg,
9006                          &Item::cache_const_expr_transformer,
9007                          (uchar *)&cache_item);
9008     if (condition == NULL)
9009       return true;
9010     best_ref[i]->set_condition(condition);
9011   }
9012   if (having_cond)
9013   {
9014     Item *cache_item= NULL;
9015     Item **analyzer_arg= &cache_item;
9016     having_cond= having_cond->compile(&Item::cache_const_expr_analyzer,
9017                                       (uchar **)&analyzer_arg,
9018                                       &Item::cache_const_expr_transformer,
9019                                       (uchar *)&cache_item);
9020     if (having_cond == NULL)
9021       return true;
9022   }
9023   return false;
9024 }
9025 
9026 
9027 /**
9028   Extract a condition that can be checked after reading given table
9029 
9030   @param cond       Condition to analyze
9031   @param tables     Tables for which "current field values" are available
9032   @param used_table Table(s) that we are extracting the condition for (may
9033                     also include PSEUDO_TABLE_BITS, and may be zero)
9034   @param exclude_expensive_cond  Do not push expensive conditions
9035 
9036   @retval <>NULL Generated condition
9037   @retval = NULL Already checked, OR error
9038 
9039   @details
9040     Extract the condition that can be checked after reading the table(s)
9041     specified in @c used_table, given that current-field values for tables
9042     specified in @c tables bitmap are available.
9043     If @c used_table is 0, extract conditions for all tables in @c tables.
9044 
9045     This function can be used to extract conditions relevant for a table
9046     in a join order. Together with its caller, it will ensure that all
9047     conditions are attached to the first table in the join order where all
9048     necessary fields are available, and it will also ensure that a given
9049     condition is attached to only one table.
9050     To accomplish this, first initialize @c tables to the empty
9051     set. Then, loop over all tables in the join order, set @c used_table to
9052     the bit representing the current table, accumulate @c used_table into the
9053     @c tables set, and call this function. To ensure correct handling of
9054     const expressions and outer references, add the const table map and
9055     OUTER_REF_TABLE_BIT to @c used_table for the first table. To ensure
9056     that random expressions are evaluated for the final table, add
9057     RAND_TABLE_BIT to @c used_table for the final table.
9058 
9059     The function assumes that constant, inexpensive parts of the condition
9060     have already been checked. Constant, expensive parts will be attached
9061     to the first table in the join order, provided that the above call
9062     sequence is followed.
9063 
9064     The call order will ensure that conditions covering tables in @c tables
9065     minus those in @c used_table, have already been checked.
9066 
9067     The function takes into account that some parts of the condition are
9068     guaranteed to be true by employed 'ref' access methods (the code that
9069     does this is located at the end, search down for "EQ_FUNC").
9070 
9071   @note
9072     make_cond_for_info_schema() uses an algorithm similar to
9073     make_cond_for_table().
9074 */
9075 
9076 Item *
make_cond_for_table(Item * cond,table_map tables,table_map used_table,bool exclude_expensive_cond)9077 make_cond_for_table(Item *cond, table_map tables, table_map used_table,
9078                     bool exclude_expensive_cond)
9079 {
9080   return make_cond_for_table_from_pred(cond, cond, tables, used_table,
9081                                        exclude_expensive_cond);
9082 }
9083 
9084 static Item *
make_cond_for_table_from_pred(Item * root_cond,Item * cond,table_map tables,table_map used_table,bool exclude_expensive_cond)9085 make_cond_for_table_from_pred(Item *root_cond, Item *cond,
9086                               table_map tables, table_map used_table,
9087                               bool exclude_expensive_cond)
9088 {
9089   /*
9090     Ignore this condition if
9091      1. We are extracting conditions for a specific table, and
9092      2. that table is not referenced by the condition, but not if
9093      3. this is a constant condition not checked at optimization time and
9094         this is the first table we are extracting conditions for.
9095        (Assuming that used_table == tables for the first table.)
9096   */
9097   if (used_table &&                                                 // 1
9098       !(cond->used_tables() & used_table) &&                        // 2
9099       !(cond->is_expensive() && used_table == tables))              // 3
9100     return NULL;
9101 
9102   if (cond->type() == Item::COND_ITEM)
9103   {
9104     if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
9105     {
9106       /* Create new top level AND item */
9107       Item_cond_and *new_cond= new Item_cond_and;
9108       if (!new_cond)
9109         return NULL;
9110       List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
9111       Item *item;
9112       while ((item= li++))
9113       {
9114         Item *fix= make_cond_for_table_from_pred(root_cond, item,
9115                                                  tables, used_table,
9116                                                  exclude_expensive_cond);
9117         if (fix)
9118           new_cond->argument_list()->push_back(fix);
9119       }
9120       switch (new_cond->argument_list()->elements) {
9121       case 0:
9122         return NULL;                          // Always true
9123       case 1:
9124         return new_cond->argument_list()->head();
9125       default:
9126         if (new_cond->fix_fields(current_thd, NULL))
9127           return NULL;
9128         return new_cond;
9129       }
9130     }
9131     else
9132     {                                         // Or list
9133       Item_cond_or *new_cond= new Item_cond_or;
9134       if (!new_cond)
9135         return NULL;
9136       List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
9137       Item *item;
9138       while ((item= li++))
9139       {
9140         Item *fix= make_cond_for_table_from_pred(root_cond, item,
9141                                                  tables, 0L,
9142                                                  exclude_expensive_cond);
9143 	if (!fix)
9144           return NULL;                        // Always true
9145 	new_cond->argument_list()->push_back(fix);
9146       }
9147       if (new_cond->fix_fields(current_thd, NULL))
9148         return NULL;
9149       return new_cond;
9150     }
9151   }
9152 
9153   /*
9154     Omit this condition if
9155      1. It has been marked as omittable before, or
9156      2. Some tables referred by the condition are not available, or
9157      3. We are extracting conditions for all tables, the condition is
9158         considered 'expensive', and we want to delay evaluation of such
9159         conditions to the execution phase.
9160   */
9161   if (cond->marker == 3 ||                                             // 1
9162       (cond->used_tables() & ~tables) ||                               // 2
9163       (!used_table && exclude_expensive_cond && cond->is_expensive())) // 3
9164     return NULL;
9165 
9166   /*
9167     Extract this condition if
9168      1. It has already been marked as applicable, or
9169      2. It is not a <comparison predicate> (=, <, >, <=, >=, <=>)
9170   */
9171   if (cond->marker == 2 ||                                             // 1
9172       cond->eq_cmp_result() == Item::COND_OK)                          // 2
9173     return cond;
9174 
9175   /*
9176     Remove equalities that are guaranteed to be true by use of 'ref' access
9177     method.
9178     Note that ref access implements "table1.field1 <=> table2.indexed_field2",
9179     i.e. if it passed a NULL field1, it will return NULL indexed_field2 if
9180     there are.
9181     Thus the equality "table1.field1 = table2.indexed_field2",
9182     is equivalent to "ref access AND table1.field1 IS NOT NULL"
9183     i.e. "ref access and proper setting/testing of ref->null_rejecting".
9184     Thus, we must be careful, that when we remove equalities below we also
9185     set ref->null_rejecting, and test it at execution; otherwise wrong NULL
9186     matches appear.
9187     So:
9188     - for the optimization phase, the code which is below, and the code in
9189     test_if_ref(), and in add_key_field(), must be kept in sync: if the
9190     applicability conditions in one place are relaxed, they should also be
9191     relaxed elsewhere.
9192     - for the execution phase, all possible execution methods must test
9193     ref->null_rejecting.
9194   */
9195   if (cond->type() == Item::FUNC_ITEM &&
9196       ((Item_func*) cond)->functype() == Item_func::EQ_FUNC)
9197   {
9198     Item *left_item= ((Item_func*) cond)->arguments()[0]->real_item();
9199     Item *right_item= ((Item_func*) cond)->arguments()[1]->real_item();
9200     if ((left_item->type() == Item::FIELD_ITEM &&
9201          test_if_ref(root_cond, (Item_field*) left_item, right_item)) ||
9202         (right_item->type() == Item::FIELD_ITEM &&
9203          test_if_ref(root_cond, (Item_field*) right_item, left_item)))
9204     {
9205       cond->marker= 3;                   // Condition can be omitted
9206       return NULL;
9207     }
9208   }
9209   cond->marker= 2;                      // Mark condition as applicable
9210   return cond;
9211 }
9212 
9213 
9214 /**
9215   Separates the predicates in a join condition and pushes them to the
9216   join step where all involved tables are available in the join prefix.
9217   ON clauses from JOIN expressions are also pushed to the most appropriate step.
9218 
9219   @param join Join object where predicates are pushed.
9220 
9221   @param cond Pointer to condition which may contain an arbitrary number of
9222               predicates, combined using AND, OR and XOR items.
9223               If NULL, equivalent to a predicate that returns TRUE for all
9224               row combinations.
9225 
9226 
9227   @retval true  Found impossible WHERE clause, or out-of-memory
9228   @retval false Other
9229 */
9230 
make_join_select(JOIN * join,Item * cond)9231 static bool make_join_select(JOIN *join, Item *cond)
9232 {
9233   THD *thd= join->thd;
9234   Opt_trace_context * const trace= &thd->opt_trace;
9235   DBUG_ENTER("make_join_select");
9236   ASSERT_BEST_REF_IN_JOIN_ORDER(join);
9237 
9238   // Add IS NOT NULL conditions to table conditions:
9239   add_not_null_conds(join);
9240 
9241   /*
9242     Extract constant conditions that are part of the WHERE clause.
9243     Constant parts of join conditions from outer joins are attached to
9244     the appropriate table condition in JOIN::attach_join_conditions().
9245   */
9246   if (cond)                /* Because of QUICK_GROUP_MIN_MAX_SELECT */
9247   {                        /* there may be a select without a cond. */
9248     if (join->primary_tables > 1)
9249       cond->update_used_tables();    // Table number may have changed
9250     if (join->plan_is_const() &&
9251         join->select_lex->master_unit() ==
9252         thd->lex->unit)             // The outer-most query block
9253       join->const_table_map|= RAND_TABLE_BIT;
9254   }
9255   /*
9256     Extract conditions that depend on constant tables.
9257     The const part of the query's WHERE clause can be checked immediately
9258     and if it is not satisfied then the join has empty result
9259   */
9260   Item *const_cond= NULL;
9261   if (cond)
9262     const_cond= make_cond_for_table(cond, join->const_table_map,
9263                                     (table_map) 0, true);
9264 
9265   // Add conditions added by add_not_null_conds()
9266   for (uint i= 0; i < join->const_tables; i++)
9267   {
9268     if (and_conditions(&const_cond, join->best_ref[i]->condition()))
9269       DBUG_RETURN(true);
9270   }
9271   DBUG_EXECUTE("where", print_where(const_cond, "constants", QT_ORDINARY););
9272   if (const_cond != NULL)
9273   {
9274     const bool const_cond_result= const_cond->val_int() != 0;
9275     if (thd->is_error())
9276       DBUG_RETURN(true);
9277 
9278     Opt_trace_object trace_const_cond(trace);
9279     trace_const_cond.add("condition_on_constant_tables", const_cond)
9280                     .add("condition_value", const_cond_result);
9281     if (!const_cond_result)
9282     {
9283       DBUG_PRINT("info",("Found impossible WHERE condition"));
9284       DBUG_RETURN(true);
9285     }
9286   }
9287 
9288   /*
9289     Extract remaining conditions from WHERE clause and join conditions,
9290     and attach them to the most appropriate table condition. This means that
9291     a condition will be evaluated as soon as all fields it depends on are
9292     available. For outer join conditions, the additional criterion is that
9293     we must have determined whether outer-joined rows are available, or
9294     have been NULL-extended, see JOIN::attach_join_conditions() for details.
9295   */
9296   {
9297     Opt_trace_object trace_wrapper(trace);
9298     Opt_trace_object
9299       trace_conditions(trace, "attaching_conditions_to_tables");
9300     trace_conditions.add("original_condition", cond);
9301     Opt_trace_array
9302       trace_attached_comp(trace, "attached_conditions_computation");
9303 
9304     for (uint i=join->const_tables ; i < join->tables ; i++)
9305     {
9306       JOIN_TAB *const tab= join->best_ref[i];
9307 
9308       if (!tab->position())
9309         continue;
9310       /*
9311         first_inner is the X in queries like:
9312         SELECT * FROM t1 LEFT OUTER JOIN (t2 JOIN t3) ON X
9313       */
9314       const plan_idx first_inner= tab->first_inner();
9315       const table_map used_tables= tab->prefix_tables();
9316       const table_map current_map= tab->added_tables();
9317       Item *tmp= NULL;
9318 
9319       if (cond)
9320         tmp= make_cond_for_table(cond,used_tables,current_map, 0);
9321       /* Add conditions added by add_not_null_conds(). */
9322       if (tab->condition() && and_conditions(&tmp, tab->condition()))
9323         DBUG_RETURN(true);
9324 
9325 
9326       if (cond && !tmp && tab->quick())
9327       {						// Outer join
9328         assert(tab->type() == JT_RANGE || tab->type() == JT_INDEX_MERGE);
9329         /*
9330           Hack to handle the case where we only refer to a table
9331           in the ON part of an OUTER JOIN. In this case we want the code
9332           below to check if we should use 'quick' instead.
9333         */
9334         DBUG_PRINT("info", ("Item_int"));
9335         tmp= new Item_int((longlong) 1,1);	// Always true
9336       }
9337       if (tmp || !cond || tab->type() == JT_REF || tab->type() == JT_REF_OR_NULL ||
9338           tab->type() == JT_EQ_REF || first_inner != NO_PLAN_IDX)
9339       {
9340         DBUG_EXECUTE("where",print_where(tmp,tab->table()->alias, QT_ORDINARY););
9341         /*
9342           If tab is an inner table of an outer join operation,
9343           add a match guard to the pushed down predicate.
9344           The guard will turn the predicate on only after
9345           the first match for outer tables is encountered.
9346 	*/
9347         if (cond && tmp)
9348         {
9349           /*
9350             Because of QUICK_GROUP_MIN_MAX_SELECT there may be a select without
9351             a cond, so neutralize the hack above.
9352           */
9353           if (!(tmp= add_found_match_trig_cond(join, first_inner, tmp, NO_PLAN_IDX)))
9354             DBUG_RETURN(true);
9355           tab->set_condition(tmp);
9356           /* Push condition to storage engine if this is enabled
9357              and the condition is not guarded */
9358 	  if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN) &&
9359               first_inner == NO_PLAN_IDX)
9360           {
9361             Item *push_cond=
9362               make_cond_for_table(tmp, tab->table_ref->map(),
9363                                   tab->table_ref->map(), 0);
9364             if (push_cond)
9365             {
9366               /* Push condition to handler */
9367               if (!tab->table()->file->cond_push(push_cond))
9368                 tab->table()->file->pushed_cond= push_cond;
9369             }
9370           }
9371         }
9372         else
9373         {
9374           tab->set_condition(NULL);
9375         }
9376 
9377         DBUG_EXECUTE("where",print_where(tmp,tab->table()->alias, QT_ORDINARY););
9378 
9379 	if (tab->quick())
9380 	{
9381           if (tab->needed_reg.is_clear_all() && tab->type() != JT_CONST)
9382           {
9383             /*
9384               We keep (for now) the QUICK AM calculated in
9385               get_quick_record_count().
9386             */
9387             assert(tab->quick()->is_valid());
9388 	  }
9389 	  else
9390           {
9391             delete tab->quick();
9392 	    tab->set_quick(NULL);
9393           }
9394 	}
9395 
9396         if ((tab->type() == JT_ALL || tab->type() == JT_RANGE ||
9397             tab->type() == JT_INDEX_MERGE || tab->type() == JT_INDEX_SCAN) &&
9398             tab->use_quick != QS_RANGE)
9399 	{
9400           /*
9401             We plan to scan (table/index/range scan).
9402             Check again if we should use an index. We can use an index if:
9403 
9404             1a) There is a condition that range optimizer can work on, and
9405             1b) There are non-constant conditions on one or more keys, and
9406             1c) Some of the non-constant fields may have been read
9407                 already. This may be the case if this is not the first
9408                 table in the join OR this is a subselect with
9409                 non-constant conditions referring to an outer table
9410                 (dependent subquery)
9411                 or,
9412             2a) There are conditions only relying on constants
9413             2b) This is the first non-constant table
9414             2c) There is a limit of rows to read that is lower than
9415                 the fanout for this table, predicate filters included
9416                 (i.e., the estimated number of rows that will be
9417                 produced for this table per row combination of
9418                 previous tables)
9419             2d) The query is NOT run with FOUND_ROWS() (because in that
9420                 case we have to scan through all rows to count them anyway)
9421           */
9422           enum { DONT_RECHECK, NOT_FIRST_TABLE, LOW_LIMIT }
9423           recheck_reason= DONT_RECHECK;
9424 
9425           assert(tab->const_keys.is_subset(tab->keys()));
9426 
9427           const join_type orig_join_type= tab->type();
9428           const QUICK_SELECT_I *const orig_quick= tab->quick();
9429 
9430           if (cond &&                                                // 1a
9431               (tab->keys() != tab->const_keys) &&                      // 1b
9432               (i > 0 ||                                              // 1c
9433                (join->select_lex->master_unit()->item &&
9434                 cond->used_tables() & OUTER_REF_TABLE_BIT)))
9435             recheck_reason= NOT_FIRST_TABLE;
9436           else if (!tab->const_keys.is_clear_all() &&                // 2a
9437                    i == join->const_tables &&                        // 2b
9438                    (join->unit->select_limit_cnt <
9439                     (tab->position()->rows_fetched *
9440                      tab->position()->filter_effect)) &&               // 2c
9441                    !join->calc_found_rows)                             // 2d
9442             recheck_reason= LOW_LIMIT;
9443 
9444           if (tab->position()->sj_strategy == SJ_OPT_LOOSE_SCAN)
9445           {
9446             /*
9447               Semijoin loose scan has settled for a certain index-based access
9448               method with suitable characteristics, don't substitute it.
9449             */
9450             recheck_reason= DONT_RECHECK;
9451           }
9452 
9453           if (recheck_reason != DONT_RECHECK)
9454           {
9455             Opt_trace_object trace_one_table(trace);
9456             trace_one_table.add_utf8_table(tab->table_ref);
9457             Opt_trace_object trace_table(trace, "rechecking_index_usage");
9458             if (recheck_reason == NOT_FIRST_TABLE)
9459               trace_table.add_alnum("recheck_reason", "not_first_table");
9460             else
9461               trace_table.add_alnum("recheck_reason", "low_limit").
9462                 add("limit", join->unit->select_limit_cnt).
9463                 add("row_estimate",
9464                     tab->position()->rows_fetched *
9465                     tab->position()->filter_effect);
9466 
9467             /* Join with outer join condition */
9468             Item *orig_cond= tab->condition();
9469             tab->and_with_condition(tab->join_cond());
9470 
9471             /*
9472               We can't call sel->cond->fix_fields,
9473               as it will break tab->join_cond() if it's AND condition
9474               (fix_fields currently removes extra AND/OR levels).
9475               Yet attributes of the just built condition are not needed.
9476               Thus we call sel->cond->quick_fix_field for safety.
9477             */
9478             if (tab->condition() && !tab->condition()->fixed)
9479               tab->condition()->quick_fix_field();
9480 
9481             key_map usable_keys= tab->keys();
9482             ORDER::enum_order interesting_order= ORDER::ORDER_NOT_RELEVANT;
9483 
9484             if (recheck_reason == LOW_LIMIT)
9485             {
9486               int read_direction= 0;
9487 
9488               /*
9489                 If the current plan is to use range, then check if the
9490                 already selected index provides the order dictated by the
9491                 ORDER BY clause.
9492               */
9493               if (tab->quick() && tab->quick()->index != MAX_KEY)
9494               {
9495                 const uint ref_key= tab->quick()->index;
9496 
9497                 read_direction= test_if_order_by_key(join->order,
9498                                                      tab->table(), ref_key);
9499                 /*
9500                   If the index provides order there is no need to recheck
9501                   index usage; we already know from the former call to
9502                   test_quick_select() that a range scan on the chosen
9503                   index is cheapest. Note that previous calls to
9504                   test_quick_select() did not take order direction
9505                   (ASC/DESC) into account, so in case of DESC ordering
9506                   we still need to recheck.
9507                 */
9508                 if ((read_direction == 1) ||
9509                     (read_direction == -1 && tab->quick()->reverse_sorted()))
9510                 {
9511                   recheck_reason= DONT_RECHECK;
9512                 }
9513               }
9514               /*
9515                 We do a cost based search for an ordering index here. Do this
9516                 only if prefer_ordering_index switch is on or an index is
9517                 forced for order by
9518               */
9519               if (recheck_reason != DONT_RECHECK &&
9520                   (tab->table()->force_index_order ||
9521                    thd->optimizer_switch_flag(
9522                        OPTIMIZER_SWITCH_PREFER_ORDERING_INDEX)))
9523               {
9524                 int best_key= -1;
9525                 ha_rows select_limit= join->unit->select_limit_cnt;
9526 
9527                 /* Use index specified in FORCE INDEX FOR ORDER BY, if any. */
9528                 if (tab->table()->force_index)
9529                   usable_keys.intersect(tab->table()->keys_in_use_for_order_by);
9530 
9531                 /* Do a cost based search on the indexes that give sort order */
9532                 test_if_cheaper_ordering(tab, join->order, tab->table(),
9533                                          usable_keys, -1, select_limit,
9534                                          &best_key, &read_direction,
9535                                          &select_limit);
9536                 if (best_key < 0)
9537                   recheck_reason= DONT_RECHECK; // No usable keys
9538                 else
9539                 {
9540                   // Only usable_key is the best_key chosen
9541                   usable_keys.clear_all();
9542                   usable_keys.set_bit(best_key);
9543                   interesting_order= (read_direction == -1 ? ORDER::ORDER_DESC :
9544                                       ORDER::ORDER_ASC);
9545                 }
9546               }
9547             }
9548 
9549             bool search_if_impossible= recheck_reason != DONT_RECHECK;
9550             if (search_if_impossible)
9551             {
9552               if (tab->quick())
9553               {
9554                 delete tab->quick();
9555                 tab->set_type(JT_ALL);
9556               }
9557               QUICK_SELECT_I *qck;
9558               search_if_impossible=
9559                 test_quick_select(thd, usable_keys,
9560                                   used_tables & ~tab->table_ref->map(),
9561                                   join->calc_found_rows ?
9562                                    HA_POS_ERROR :
9563                                    join->unit->select_limit_cnt,
9564                                   false,   // don't force quick range
9565                                   interesting_order, tab,
9566                                   tab->condition(),
9567                                   &tab->needed_reg, &qck,
9568                                   tab->table()->force_index) < 0;
9569               tab->set_quick(qck);
9570             }
9571             tab->set_condition(orig_cond);
9572             if (search_if_impossible)
9573             {
9574               /*
9575                 Before reporting "Impossible WHERE" for the whole query
9576                 we have to check isn't it only "impossible ON" instead
9577               */
9578               if (!tab->join_cond())
9579                 DBUG_RETURN(1);  // No ON, so it's really "impossible WHERE"
9580               Opt_trace_object trace_without_on(trace, "without_ON_clause");
9581               if (tab->quick())
9582               {
9583                 delete tab->quick();
9584                 tab->set_type(JT_ALL);
9585               }
9586               QUICK_SELECT_I *qck;
9587               const bool impossible_where=
9588                 test_quick_select(thd, tab->keys(),
9589                                   used_tables & ~tab->table_ref->map(),
9590                                   join->calc_found_rows ?
9591                                    HA_POS_ERROR :
9592                                    join->unit->select_limit_cnt,
9593                                   false,   //don't force quick range
9594                                   ORDER::ORDER_NOT_RELEVANT, tab,
9595                                   tab->condition(), &tab->needed_reg,
9596                                   &qck, tab->table()->force_index) < 0;
9597               tab->set_quick(qck);
9598               if (impossible_where)
9599                 DBUG_RETURN(1);			// Impossible WHERE
9600             }
9601 
9602             /*
9603               Access method changed. This is after deciding join order
9604               and access method for all other tables so the info
9605               updated below will not have any effect on the execution
9606               plan.
9607             */
9608             if (tab->quick())
9609               tab->set_type(calc_join_type(tab->quick()->get_type()));
9610 
9611           } // end of "if (recheck_reason != DONT_RECHECK)"
9612 
9613           if (!tab->table()->quick_keys.is_subset(tab->checked_keys) ||
9614               !tab->needed_reg.is_subset(tab->checked_keys))
9615           {
9616             tab->keys().merge(tab->table()->quick_keys);
9617             tab->keys().merge(tab->needed_reg);
9618 
9619             /*
9620               The logic below for assigning tab->use_quick is strange.
9621               It bases the decision of which access method to use
9622               (dynamic range, range, scan) based on seemingly
9623               unrelated information like the presense of another index
9624               with too bad selectivity to be used.
9625 
9626               Consider the following scenario:
9627 
9628               The join optimizer has decided to use join order
9629               (t1,t2), and 'tab' is currently t2. Further, assume that
9630               there is a join condition between t1 and t2 using some
9631               range operator (e.g. "t1.x < t2.y").
9632 
9633               It has been decided that a table scan is best for t2.
9634               make_join_select() then reran the range optimizer a few
9635               lines up because there is an index 't2.good_idx'
9636               covering the t2.y column. If 'good_idx' is the only
9637               index in t2, the decision below will be to use dynamic
9638               range. However, if t2 also has another index 't2.other'
9639               which the range access method can be used on but
9640               selectivity is bad (#rows estimate is high), then table
9641               scan is chosen instead.
9642 
9643               Thus, the choice of DYNAMIC RANGE vs SCAN depends on the
9644               presense of an index that has so bad selectivity that it
9645               will not be used anyway.
9646             */
9647             if (!tab->needed_reg.is_clear_all() &&
9648                 (tab->table()->quick_keys.is_clear_all() ||
9649                  (tab->quick() &&
9650                   (tab->quick()->records >= 100L))))
9651             {
9652               tab->use_quick= QS_DYNAMIC_RANGE;
9653               tab->set_type(JT_ALL);
9654             }
9655             else
9656               tab->use_quick= QS_RANGE;
9657           }
9658 
9659           if (tab->type() != orig_join_type ||
9660               tab->quick() != orig_quick)       // Access method changed
9661             tab->position()->filter_effect= COND_FILTER_STALE;
9662 
9663 	}
9664       }
9665 
9666       if (join->attach_join_conditions(i))
9667         DBUG_RETURN(true);
9668     }
9669     trace_attached_comp.end();
9670 
9671     /*
9672       In outer joins the loop above, in iteration for table #i, may push
9673       conditions to a table before #i. Thus, the processing below has to be in
9674       a separate loop:
9675     */
9676     Opt_trace_array trace_attached_summary(trace,
9677                                            "attached_conditions_summary");
9678     for (uint i= join->const_tables ; i < join->tables ; i++)
9679     {
9680       JOIN_TAB * const tab= join->best_ref[i];
9681       if (!tab->table())
9682         continue;
9683       Item * const cond= tab->condition();
9684       Opt_trace_object trace_one_table(trace);
9685       trace_one_table.add_utf8_table(tab->table_ref).
9686         add("attached", cond);
9687       if (cond &&
9688           cond->has_subquery() /* traverse only if needed */ )
9689       {
9690         /*
9691           Why we pass walk_subquery=false: imagine
9692           WHERE t1.col IN (SELECT * FROM t2
9693                              WHERE t2.col IN (SELECT * FROM t3)
9694           and tab==t1. The grandchild subquery (SELECT * FROM t3) should not
9695           be marked as "in condition of t1" but as "in condition of t2", for
9696           correct calculation of the number of its executions.
9697         */
9698         std::pair<SELECT_LEX *, int> pair_object(join->select_lex, i);
9699         cond->walk(&Item::inform_item_in_cond_of_tab,
9700                    Item::WALK_POSTFIX,
9701                    pointer_cast<uchar * const>(&pair_object));
9702       }
9703 
9704     }
9705   }
9706   DBUG_RETURN(0);
9707 }
9708 
9709 
9710 /**
9711   Remove the following expressions from ORDER BY and GROUP BY:
9712   Constant expressions @n
9713   Expression that only uses tables that are of type EQ_REF and the reference
9714   is in the ORDER list or if all refereed tables are of the above type.
9715 
9716   In the following, the X field can be removed:
9717   @code
9718   SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t1.a,t2.X
9719   SELECT * FROM t1,t2,t3 WHERE t1.a=t2.a AND t2.b=t3.b ORDER BY t1.a,t3.X
9720   @endcode
9721 
9722   These can't be optimized:
9723   @code
9724   SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t2.X,t1.a
9725   SELECT * FROM t1,t2 WHERE t1.a=t2.a AND t1.b=t2.b ORDER BY t1.a,t2.c
9726   SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t2.b,t1.a
9727   @endcode
9728 
9729   @param  JOIN         join object
9730   @param  start_order  clause being analyzed (ORDER BY, GROUP BY...)
9731   @param  tab          table
9732   @param  cached_eq_ref_tables  bitmap: bit Z is set if the table of map Z
9733   was already the subject of an eq_ref_table() call for the same clause; then
9734   the return value of this previous call can be found at bit Z of
9735   'eq_ref_tables'
9736   @param  eq_ref_tables see above.
9737 */
9738 
9739 static bool
eq_ref_table(JOIN * join,ORDER * start_order,JOIN_TAB * tab,table_map * cached_eq_ref_tables,table_map * eq_ref_tables)9740 eq_ref_table(JOIN *join, ORDER *start_order, JOIN_TAB *tab,
9741              table_map *cached_eq_ref_tables, table_map *eq_ref_tables)
9742 {
9743   /* We can skip const tables only if not an outer table */
9744   if (tab->type() == JT_CONST && tab->first_inner() == NO_PLAN_IDX)
9745     return true;
9746   if (tab->type() != JT_EQ_REF || tab->table()->is_nullable())
9747     return false;
9748 
9749   const table_map map= tab->table_ref->map();
9750   uint found= 0;
9751 
9752   for (Item **ref_item= tab->ref().items, **end= ref_item + tab->ref().key_parts ;
9753        ref_item != end ; ref_item++)
9754   {
9755     if (! (*ref_item)->const_item())
9756     {						// Not a const ref
9757       ORDER *order;
9758       for (order=start_order ; order ; order=order->next)
9759       {
9760 	if ((*ref_item)->eq(order->item[0],0))
9761 	  break;
9762       }
9763       if (order)
9764       {
9765         if (!(order->used & map))
9766         {
9767           found++;
9768           order->used|= map;
9769         }
9770 	continue;				// Used in ORDER BY
9771       }
9772       if (!only_eq_ref_tables(join, start_order, (*ref_item)->used_tables(),
9773                               cached_eq_ref_tables, eq_ref_tables))
9774         return false;
9775     }
9776   }
9777   /* Check that there was no reference to table before sort order */
9778   for (; found && start_order ; start_order=start_order->next)
9779   {
9780     if (start_order->used & map)
9781     {
9782       found--;
9783       continue;
9784     }
9785     if (start_order->depend_map & map)
9786       return false;
9787   }
9788   return true;
9789 }
9790 
9791 
9792 /// @see eq_ref_table()
9793 static bool
only_eq_ref_tables(JOIN * join,ORDER * order,table_map tables,table_map * cached_eq_ref_tables,table_map * eq_ref_tables)9794 only_eq_ref_tables(JOIN *join, ORDER *order, table_map tables,
9795                    table_map *cached_eq_ref_tables, table_map *eq_ref_tables)
9796 {
9797   tables&= ~PSEUDO_TABLE_BITS;
9798   for (JOIN_TAB **tab=join->map2table ; tables ; tab++, tables>>=1)
9799   {
9800     if (tables & 1)
9801     {
9802       const table_map map= (*tab)->table_ref->map();
9803       bool is_eq_ref;
9804       if (*cached_eq_ref_tables & map) // then there exists a cached bit
9805         is_eq_ref= *eq_ref_tables & map;
9806       else
9807       {
9808         is_eq_ref= eq_ref_table(join, order, *tab,
9809                                 cached_eq_ref_tables, eq_ref_tables);
9810         if (is_eq_ref)
9811           *eq_ref_tables|= map;
9812         else
9813           *eq_ref_tables&= ~map;
9814         *cached_eq_ref_tables|= map; // now there exists a cached bit
9815       }
9816       if (!is_eq_ref)
9817         return false;
9818     }
9819   }
9820   return true;
9821 }
9822 
9823 
9824 /**
9825   Check if an expression in ORDER BY or GROUP BY is a duplicate of a
9826   preceding expression.
9827 
9828   @param  first_order   the first expression in the ORDER BY or
9829                         GROUP BY clause
9830   @param  possible_dup  the expression that might be a duplicate of
9831                         another expression preceding it the ORDER BY
9832                         or GROUP BY clause
9833 
9834   @returns true if possible_dup is a duplicate, false otherwise
9835 */
duplicate_order(const ORDER * first_order,const ORDER * possible_dup)9836 static bool duplicate_order(const ORDER *first_order,
9837                             const ORDER *possible_dup)
9838 {
9839   const ORDER *order;
9840   for (order=first_order; order ; order=order->next)
9841   {
9842     if (order == possible_dup)
9843     {
9844       // all expressions preceding possible_dup have been checked.
9845       return false;
9846     }
9847     else
9848     {
9849       const Item *it1= order->item[0]->real_item();
9850       const Item *it2= possible_dup->item[0]->real_item();
9851 
9852       if (it1->eq(it2, 0))
9853         return true;
9854     }
9855   }
9856   return false;
9857 }
9858 
9859 /**
9860   Remove all constants and check if ORDER only contains simple
9861   expressions.
9862 
9863   simple_order is set to 1 if sort_order only uses fields from head table
9864   and the head table is not a LEFT JOIN table.
9865 
9866   @param first_order            List of SORT or GROUP order
9867   @param cond                   WHERE statement
9868   @param change_list            Set to 1 if we should remove things from list.
9869                                 If this is not set, then only simple_order is
9870                                 calculated.
9871   @param simple_order           Set to 1 if we are only using simple expressions
9872   @param clause_type            "ORDER BY" etc for printing in optimizer trace
9873 
9874   @return
9875     Returns new sort order
9876 */
9877 
remove_const(ORDER * first_order,Item * cond,bool change_list,bool * simple_order,const char * clause_type)9878 ORDER *JOIN::remove_const(ORDER *first_order, Item *cond, bool change_list,
9879                           bool *simple_order, const char *clause_type)
9880 {
9881   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
9882 
9883   if (plan_is_const())
9884     return change_list ? 0 : first_order;		// No need to sort
9885 
9886   Opt_trace_context * const trace= &thd->opt_trace;
9887   Opt_trace_disable_I_S trace_disabled(trace, first_order == NULL);
9888   Opt_trace_object trace_wrapper(trace);
9889   Opt_trace_object trace_simpl(trace, "clause_processing");
9890   if (trace->is_started())
9891   {
9892     trace_simpl.add_alnum("clause", clause_type);
9893     String str;
9894     st_select_lex::print_order(&str, first_order,
9895                                enum_query_type(QT_TO_SYSTEM_CHARSET |
9896                                                QT_SHOW_SELECT_NUMBER |
9897                                                QT_NO_DEFAULT_DB));
9898     trace_simpl.add_utf8("original_clause", str.ptr(), str.length());
9899   }
9900   Opt_trace_array trace_each_item(trace, "items");
9901 
9902   ORDER *order,**prev_ptr;
9903   JOIN_TAB *const first_tab= best_ref[const_tables];
9904   table_map first_table= first_tab->table_ref->map();
9905   table_map not_const_tables= ~const_table_map;
9906   table_map ref;
9907   // Caches to avoid repeating eq_ref_table() calls, @see eq_ref_table()
9908   table_map eq_ref_tables= 0, cached_eq_ref_tables= 0;
9909   DBUG_ENTER("JOIN::remove_const");
9910 
9911   prev_ptr= &first_order;
9912   *simple_order= !first_tab->join_cond();
9913 
9914   /* NOTE: A variable of not_const_tables ^ first_table; breaks gcc 2.7 */
9915 
9916   update_depend_map(first_order);
9917   for (order=first_order; order ; order=order->next)
9918   {
9919     Opt_trace_object trace_one_item(trace);
9920     trace_one_item.add("item", order->item[0]);
9921     table_map order_tables=order->item[0]->used_tables();
9922     if (order->item[0]->with_sum_func ||
9923         /*
9924           If the outer table of an outer join is const (either by itself or
9925           after applying WHERE condition), grouping on a field from such a
9926           table will be optimized away and filesort without temporary table
9927           will be used unless we prevent that now. Filesort is not fit to
9928           handle joins and the join condition is not applied. We can't detect
9929           the case without an expensive test, however, so we force temporary
9930           table for all queries containing more than one table, ROLLUP, and an
9931           outer join.
9932          */
9933         (primary_tables > 1 &&
9934          rollup.state == ROLLUP::STATE_INITED &&
9935          select_lex->outer_join))
9936       *simple_order= 0;                // Must do a temp table to sort
9937     else if (!(order_tables & not_const_tables))
9938     {
9939       if (order->item[0]->has_subquery())
9940       {
9941         if (!thd->lex->is_explain())
9942         {
9943           Opt_trace_array trace_subselect(trace, "subselect_evaluation");
9944           order->item[0]->val_str(&order->item[0]->str_value);
9945         }
9946         order->item[0]->mark_subqueries_optimized_away();
9947       }
9948       trace_one_item.add("uses_only_constant_tables", true);
9949       continue;                        // skip const item
9950     }
9951     else if (duplicate_order(first_order, order))
9952     {
9953       /*
9954         If 'order' is a duplicate of an expression earlier in the
9955         ORDER/GROUP BY sequence, it can be removed from the ORDER BY
9956         or GROUP BY clause.
9957       */
9958       trace_one_item.add("duplicate_item", true);
9959       continue;
9960     }
9961     else if (order->in_field_list && order->item[0]->has_subquery())
9962       /*
9963         If the order item is a subquery that is also in the field
9964         list, a temp table should be used to avoid evaluating the
9965         subquery for each row both when a) creating a sort index and
9966         b) getting the value.
9967           Example: "SELECT (SELECT ... ) as a ... GROUP BY a;"
9968        */
9969       *simple_order= false;
9970     else
9971     {
9972       if (order_tables & (RAND_TABLE_BIT | OUTER_REF_TABLE_BIT))
9973 	*simple_order=0;
9974       else
9975       {
9976 	if (cond && const_expression_in_where(cond,order->item[0]))
9977 	{
9978           trace_one_item.add("equals_constant_in_where", true);
9979 	  continue;
9980 	}
9981 	if ((ref=order_tables & (not_const_tables ^ first_table)))
9982 	{
9983 	  if (!(order_tables & first_table) &&
9984               only_eq_ref_tables(this, first_order, ref,
9985                                  &cached_eq_ref_tables, &eq_ref_tables))
9986 	  {
9987             trace_one_item.add("eq_ref_to_preceding_items", true);
9988 	    continue;
9989 	  }
9990 	  *simple_order=0;			// Must do a temp table to sort
9991 	}
9992       }
9993     }
9994     if (change_list)
9995       *prev_ptr= order;				// use this entry
9996     prev_ptr= &order->next;
9997   }
9998   if (change_list)
9999     *prev_ptr=0;
10000   if (prev_ptr == &first_order)			// Nothing to sort/group
10001     *simple_order=1;
10002   DBUG_PRINT("exit",("simple_order: %d",(int) *simple_order));
10003 
10004   trace_each_item.end();
10005   trace_simpl.add("resulting_clause_is_simple", *simple_order);
10006   if (trace->is_started() && change_list)
10007   {
10008     String str;
10009     st_select_lex::print_order(&str, first_order,
10010                                enum_query_type(QT_TO_SYSTEM_CHARSET |
10011                                                QT_SHOW_SELECT_NUMBER |
10012                                                QT_NO_DEFAULT_DB));
10013     trace_simpl.add_utf8("resulting_clause", str.ptr(), str.length());
10014   }
10015 
10016   DBUG_RETURN(first_order);
10017 }
10018 
10019 
10020 /**
10021   Optimize conditions by
10022 
10023      a) applying transitivity to build multiple equality predicates
10024         (MEP): if x=y and y=z the MEP x=y=z is built.
10025      b) apply constants where possible. If the value of x is known to be
10026         42, x is replaced with a constant of value 42. By transitivity, this
10027         also applies to MEPs, so the MEP in a) will become 42=x=y=z.
10028      c) remove conditions that are always false or always true
10029 
10030   @param thd              Thread handler
10031   @param[in,out] cond     WHERE or HAVING condition to optimize
10032   @param[out] cond_equal  The built multiple equalities
10033   @param join_list        list of join operations with join conditions
10034                           = NULL: Called for HAVING condition
10035   @param[out] cond_value  Not changed if cond was empty
10036                             COND_TRUE if cond is always true
10037                             COND_FALSE if cond is impossible
10038                             COND_OK otherwise
10039 
10040   @returns false if success, true if error
10041 */
10042 
optimize_cond(THD * thd,Item ** cond,COND_EQUAL ** cond_equal,List<TABLE_LIST> * join_list,Item::cond_result * cond_value)10043 bool optimize_cond(THD *thd, Item **cond, COND_EQUAL **cond_equal,
10044                    List<TABLE_LIST> *join_list,
10045                    Item::cond_result *cond_value)
10046 {
10047   Opt_trace_context * const trace= &thd->opt_trace;
10048   DBUG_ENTER("optimize_cond");
10049 
10050   Opt_trace_object trace_wrapper(trace);
10051   Opt_trace_object trace_cond(trace, "condition_processing");
10052   trace_cond.add_alnum("condition", join_list ? "WHERE" : "HAVING");
10053   trace_cond.add("original_condition", *cond);
10054   Opt_trace_array trace_steps(trace, "steps");
10055 
10056   /*
10057     Enter this function
10058     a) For a WHERE condition or a query having outer join.
10059     b) For a HAVING condition.
10060   */
10061   assert(*cond || join_list);
10062 
10063   /*
10064     Build all multiple equality predicates and eliminate equality
10065     predicates that can be inferred from these multiple equalities.
10066     For each reference of a field included into a multiple equality
10067     that occurs in a function set a pointer to the multiple equality
10068     predicate. Substitute a constant instead of this field if the
10069     multiple equality contains a constant.
10070     This is performed for the WHERE condition and any join conditions, but
10071     not for the HAVING condition.
10072   */
10073   if (join_list)
10074   {
10075     Opt_trace_object step_wrapper(trace);
10076     step_wrapper.add_alnum("transformation", "equality_propagation");
10077     {
10078       Opt_trace_disable_I_S
10079         disable_trace_wrapper(trace, !(*cond && (*cond)->has_subquery()));
10080       Opt_trace_array
10081         trace_subselect(trace, "subselect_evaluation");
10082       if (build_equal_items(thd, *cond, cond, NULL, true,
10083                             join_list, cond_equal))
10084         DBUG_RETURN(true);
10085     }
10086     step_wrapper.add("resulting_condition", *cond);
10087   }
10088   /* change field = field to field = const for each found field = const */
10089   if (*cond)
10090   {
10091     Opt_trace_object step_wrapper(trace);
10092     step_wrapper.add_alnum("transformation", "constant_propagation");
10093     {
10094       Opt_trace_disable_I_S
10095         disable_trace_wrapper(trace, !(*cond)->has_subquery());
10096       Opt_trace_array trace_subselect(trace, "subselect_evaluation");
10097       if (propagate_cond_constants(thd, NULL, *cond, *cond))
10098         DBUG_RETURN(true);
10099     }
10100     step_wrapper.add("resulting_condition", *cond);
10101   }
10102 
10103   /*
10104     Remove all instances of item == item
10105     Remove all and-levels where CONST item != CONST item
10106   */
10107   DBUG_EXECUTE("where",print_where(*cond,"after const change", QT_ORDINARY););
10108   if (*cond)
10109   {
10110     Opt_trace_object step_wrapper(trace);
10111     step_wrapper.add_alnum("transformation", "trivial_condition_removal");
10112     {
10113       Opt_trace_disable_I_S
10114         disable_trace_wrapper(trace, !(*cond)->has_subquery());
10115       Opt_trace_array trace_subselect(trace, "subselect_evaluation");
10116       if (remove_eq_conds(thd, *cond, cond, cond_value))
10117         DBUG_RETURN(true);
10118     }
10119     step_wrapper.add("resulting_condition", *cond);
10120   }
10121   assert(!thd->is_error());
10122   if (thd->is_error())
10123     DBUG_RETURN(true);
10124   DBUG_RETURN(false);
10125 }
10126 
10127 
10128 /**
10129   Handle the recursive job for remove_eq_conds()
10130 
10131   @param thd             Thread handler
10132   @param cond            the condition to handle.
10133   @param[out] retcond    Modified condition after removal
10134   @param[out] cond_value the resulting value of the condition
10135 
10136   @see remove_eq_conds() for more details on argument
10137 
10138   @returns false if success, true if error
10139 */
10140 
internal_remove_eq_conds(THD * thd,Item * cond,Item ** retcond,Item::cond_result * cond_value)10141 static bool internal_remove_eq_conds(THD *thd, Item *cond,
10142                                      Item **retcond,
10143                                      Item::cond_result *cond_value)
10144 {
10145   if (cond->type() == Item::COND_ITEM)
10146   {
10147     Item_cond *const item_cond= down_cast<Item_cond *>(cond);
10148     const bool and_level= item_cond->functype() == Item_func::COND_AND_FUNC;
10149     List_iterator<Item> li(*item_cond->argument_list());
10150     bool should_fix_fields= false;
10151 
10152     *cond_value=Item::COND_UNDEF;
10153     Item *item;
10154     while ((item=li++))
10155     {
10156       Item *new_item;
10157       Item::cond_result tmp_cond_value;
10158       if (internal_remove_eq_conds(thd, item, &new_item, &tmp_cond_value))
10159         return true;
10160 
10161       if (new_item == NULL)
10162         li.remove();
10163       else if (item != new_item)
10164       {
10165         (void) li.replace(new_item);
10166         should_fix_fields= true;
10167       }
10168       if (*cond_value == Item::COND_UNDEF)
10169          *cond_value= tmp_cond_value;
10170       switch (tmp_cond_value)
10171       {
10172       case Item::COND_OK:                       // Not TRUE or FALSE
10173         if (and_level || *cond_value == Item::COND_FALSE)
10174           *cond_value= tmp_cond_value;
10175         break;
10176       case Item::COND_FALSE:
10177         if (and_level)                          // Always false
10178         {
10179           *cond_value= tmp_cond_value;
10180           *retcond= NULL;
10181           return false;
10182         }
10183         break;
10184       case Item::COND_TRUE:
10185         if (!and_level)                         // Always true
10186         {
10187           *cond_value= tmp_cond_value;
10188           *retcond= NULL;
10189           return false;
10190         }
10191         break;
10192       case Item::COND_UNDEF:			// Impossible
10193         assert(false);                     /* purecov: deadcode */
10194       }
10195     }
10196     if (should_fix_fields)
10197       item_cond->update_used_tables();
10198 
10199     if (item_cond->argument_list()->elements == 0 ||
10200         *cond_value != Item::COND_OK)
10201     {
10202       *retcond= NULL;
10203       return false;
10204     }
10205     if (item_cond->argument_list()->elements == 1)
10206     {
10207       /*
10208         BUG#11765699:
10209         We're dealing with an AND or OR item that has only one
10210         argument. However, it is not an option to empty the list
10211         because:
10212 
10213          - this function is called for either JOIN::conds or
10214            JOIN::having, but these point to the same condition as
10215            SELECT_LEX::where and SELECT_LEX::having do.
10216 
10217          - The return value of remove_eq_conds() is assigned to
10218            JOIN::conds and JOIN::having, so emptying the list and
10219            returning the only remaining item "replaces" the AND or OR
10220            with item for the variables in JOIN. However, the return
10221            value is not assigned to the SELECT_LEX counterparts. Thus,
10222            if argument_list is emptied, SELECT_LEX forgets the item in
10223            argument_list()->head().
10224 
10225         item is therefore returned, but argument_list is not emptied.
10226       */
10227       item= item_cond->argument_list()->head();
10228       /*
10229         Consider reenabling the line below when the optimizer has been
10230         split into properly separated phases.
10231 
10232         item_cond->argument_list()->empty();
10233       */
10234       *retcond= item;
10235       return false;
10236     }
10237   }
10238   else if (cond->type() == Item::FUNC_ITEM &&
10239            down_cast<Item_func *>(cond)->functype() == Item_func::ISNULL_FUNC)
10240   {
10241     Item_func_isnull *const func= down_cast<Item_func_isnull *>(cond);
10242     Item **args= func->arguments();
10243     if (args[0]->type() == Item::FIELD_ITEM)
10244     {
10245       Field *const field= down_cast<Item_field *>(args[0])->field;
10246       /* fix to replace 'NULL' dates with '0' (shreeve@uci.edu) */
10247       /*
10248         See BUG#12594011
10249         Documentation says that
10250         SELECT datetime_notnull d FROM t1 WHERE d IS NULL
10251         shall return rows where d=='0000-00-00'
10252 
10253         Thus, for DATE and DATETIME columns defined as NOT NULL,
10254         "date_notnull IS NULL" has to be modified to
10255         "date_notnull IS NULL OR date_notnull == 0" (if outer join)
10256         "date_notnull == 0"                         (otherwise)
10257 
10258       */
10259       if (((field->type() == MYSQL_TYPE_DATE) ||
10260            (field->type() == MYSQL_TYPE_DATETIME)) &&
10261           (field->flags & NOT_NULL_FLAG))
10262       {
10263         Item *item0= new(thd->mem_root) Item_int((longlong)0, 1);
10264         if (item0 == NULL)
10265           return true;
10266         Item *eq_cond= new(thd->mem_root) Item_func_eq(args[0], item0);
10267         if (eq_cond == NULL)
10268           return true;
10269 
10270         if (args[0]->is_outer_field())
10271         {
10272           // outer join: transform "col IS NULL" to "col IS NULL or col=0"
10273           Item *or_cond= new(thd->mem_root) Item_cond_or(eq_cond, cond);
10274           if (or_cond == NULL)
10275             return true;
10276           cond= or_cond;
10277         }
10278         else
10279         {
10280           // not outer join: transform "col IS NULL" to "col=0"
10281           cond= eq_cond;
10282         }
10283 
10284         if (cond->fix_fields(thd, &cond))
10285           return true;
10286       }
10287     }
10288     if (cond->const_item())
10289     {
10290       bool value;
10291       if (eval_const_cond(thd, cond, &value))
10292         return true;
10293       *cond_value= value ? Item::COND_TRUE : Item::COND_FALSE;
10294       *retcond= NULL;
10295       return false;
10296     }
10297   }
10298   else if (cond->const_item() && !cond->is_expensive())
10299   {
10300     bool value;
10301     if (eval_const_cond(thd, cond, &value))
10302       return true;
10303     *cond_value= value ? Item::COND_TRUE : Item::COND_FALSE;
10304     *retcond= NULL;
10305     return false;
10306   }
10307   else
10308   {                                             // boolan compare function
10309     *cond_value= cond->eq_cmp_result();
10310     if (*cond_value == Item::COND_OK)
10311     {
10312       *retcond= cond;
10313       return false;
10314     }
10315     Item *left_item= down_cast<Item_func *>(cond)->arguments()[0];
10316     Item *right_item= down_cast<Item_func *>(cond)->arguments()[1];
10317     if (left_item->eq(right_item,1))
10318     {
10319       if (!left_item->maybe_null ||
10320           down_cast<Item_func *>(cond)->functype() == Item_func::EQUAL_FUNC)
10321       {
10322         *retcond= NULL;
10323         return false;                           // Compare of identical items
10324       }
10325     }
10326   }
10327   *cond_value= Item::COND_OK;
10328   *retcond= cond;                               // Point at next and level
10329   return false;
10330 }
10331 
10332 
10333 /**
10334   Remove const and eq items. Return new item, or NULL if no condition
10335 
10336   @param      thd        thread handler
10337   @param      cond       the condition to handle
10338   @param[out] retcond    condition after const removal
10339   @param[out] cond_value resulting value of the condition
10340               =COND_OK    condition must be evaluated (e.g field = constant)
10341               =COND_TRUE  always true                 (e.g 1 = 1)
10342               =COND_FALSE always false                (e.g 1 = 2)
10343 
10344   @note calls internal_remove_eq_conds() to check the complete tree.
10345 
10346   @returns false if success, true if error
10347 */
10348 
remove_eq_conds(THD * thd,Item * cond,Item ** retcond,Item::cond_result * cond_value)10349 bool remove_eq_conds(THD *thd, Item *cond, Item **retcond,
10350                      Item::cond_result *cond_value)
10351 {
10352   if (cond->type() == Item::FUNC_ITEM &&
10353       down_cast<Item_func *>(cond)->functype() == Item_func::ISNULL_FUNC)
10354   {
10355     /*
10356       Handles this special case for some ODBC applications:
10357       The are requesting the row that was just updated with a auto_increment
10358       value with this construct:
10359 
10360       SELECT * from table_name where auto_increment_column IS NULL
10361       This will be changed to:
10362       SELECT * from table_name where auto_increment_column = LAST_INSERT_ID
10363     */
10364 
10365     Item_func_isnull *const func= down_cast<Item_func_isnull *>(cond);
10366     Item **args= func->arguments();
10367     if (args[0]->type() == Item::FIELD_ITEM)
10368     {
10369       Field *const field= down_cast<Item_field *>(args[0])->field;
10370       if ((field->flags & AUTO_INCREMENT_FLAG) &&
10371           !field->table->is_nullable() &&
10372 	  (thd->variables.option_bits & OPTION_AUTO_IS_NULL) &&
10373 	  (thd->first_successful_insert_id_in_prev_stmt > 0 &&
10374            thd->substitute_null_with_insert_id))
10375       {
10376         query_cache.abort(&thd->query_cache_tls);
10377 
10378         cond= new Item_func_eq(
10379                 args[0],
10380                 new Item_int(NAME_STRING("last_insert_id()"),
10381                             thd->read_first_successful_insert_id_in_prev_stmt(),
10382                              MY_INT64_NUM_DECIMAL_DIGITS));
10383         if (cond == NULL)
10384           return true;
10385 
10386         if (cond->fix_fields(thd, &cond))
10387           return true;
10388 
10389         /*
10390           IS NULL should be mapped to LAST_INSERT_ID only for first row, so
10391           clear for next row
10392         */
10393         thd->substitute_null_with_insert_id= FALSE;
10394 
10395         *cond_value= Item::COND_OK;
10396         *retcond= cond;
10397         return false;
10398       }
10399     }
10400   }
10401   return internal_remove_eq_conds(thd, cond, retcond, cond_value);
10402 }
10403 
10404 
10405 /**
10406   Check if GROUP BY/DISTINCT can be optimized away because the set is
10407   already known to be distinct.
10408 
10409   Used in removing the GROUP BY/DISTINCT of the following types of
10410   statements:
10411   @code
10412     SELECT [DISTINCT] <unique_key_cols>... FROM <single_table_ref>
10413       [GROUP BY <unique_key_cols>,...]
10414   @endcode
10415 
10416     If (a,b,c is distinct)
10417     then <any combination of a,b,c>,{whatever} is also distinct
10418 
10419     This function checks if all the key parts of any of the unique keys
10420     of the table are referenced by a list : either the select list
10421     through find_field_in_item_list or GROUP BY list through
10422     find_field_in_order_list.
10423     If the above holds and the key parts cannot contain NULLs then we
10424     can safely remove the GROUP BY/DISTINCT,
10425     as no result set can be more distinct than an unique key.
10426 
10427   @param tab                  The join table to operate on.
10428   @param find_func            function to iterate over the list and search
10429                               for a field
10430 
10431   @retval
10432     1                    found
10433   @retval
10434     0                    not found.
10435 
10436   @note
10437     The function assumes that make_outerjoin_info() has been called in
10438     order for the check for outer tables to work.
10439 */
10440 
10441 static bool
list_contains_unique_index(JOIN_TAB * tab,bool (* find_func)(Field *,void *),void * data)10442 list_contains_unique_index(JOIN_TAB *tab,
10443                           bool (*find_func) (Field *, void *), void *data)
10444 {
10445   TABLE *table= tab->table();
10446 
10447   if (tab->is_inner_table_of_outer_join())
10448     return 0;
10449   for (uint keynr= 0; keynr < table->s->keys; keynr++)
10450   {
10451     if (keynr == table->s->primary_key ||
10452          (table->key_info[keynr].flags & HA_NOSAME))
10453     {
10454       KEY *keyinfo= table->key_info + keynr;
10455       KEY_PART_INFO *key_part, *key_part_end;
10456 
10457       for (key_part=keyinfo->key_part,
10458            key_part_end=key_part+ keyinfo->user_defined_key_parts;
10459            key_part < key_part_end;
10460            key_part++)
10461       {
10462         if (key_part->field->real_maybe_null() ||
10463             !find_func(key_part->field, data))
10464           break;
10465       }
10466       if (key_part == key_part_end)
10467         return 1;
10468     }
10469   }
10470   return 0;
10471 }
10472 
10473 
10474 /**
10475   Helper function for list_contains_unique_index.
10476   Find a field reference in a list of ORDER structures.
10477   Finds a direct reference of the Field in the list.
10478 
10479   @param field                The field to search for.
10480   @param data                 ORDER *.The list to search in
10481 
10482   @retval
10483     1                    found
10484   @retval
10485     0                    not found.
10486 */
10487 
10488 static bool
find_field_in_order_list(Field * field,void * data)10489 find_field_in_order_list (Field *field, void *data)
10490 {
10491   ORDER *group= (ORDER *) data;
10492   bool part_found= 0;
10493   for (ORDER *tmp_group= group; tmp_group; tmp_group=tmp_group->next)
10494   {
10495     Item *item= (*tmp_group->item)->real_item();
10496     if (item->type() == Item::FIELD_ITEM &&
10497         ((Item_field*) item)->field->eq(field))
10498     {
10499       part_found= 1;
10500       break;
10501     }
10502   }
10503   return part_found;
10504 }
10505 
10506 
10507 /**
10508   Helper function for list_contains_unique_index.
10509   Find a field reference in a dynamic list of Items.
10510   Finds a direct reference of the Field in the list.
10511 
10512   @param[in] field             The field to search for.
10513   @param[in] data              List<Item> *.The list to search in
10514 
10515   @retval
10516     1                    found
10517   @retval
10518     0                    not found.
10519 */
10520 
10521 static bool
find_field_in_item_list(Field * field,void * data)10522 find_field_in_item_list (Field *field, void *data)
10523 {
10524   List<Item> *fields= (List<Item> *) data;
10525   bool part_found= 0;
10526   List_iterator<Item> li(*fields);
10527   Item *item;
10528 
10529   while ((item= li++))
10530   {
10531     if (item->type() == Item::FIELD_ITEM &&
10532         ((Item_field*) item)->field->eq(field))
10533     {
10534       part_found= 1;
10535       break;
10536     }
10537   }
10538   return part_found;
10539 }
10540 
10541 
10542 /**
10543   Create a group by that consist of all non const fields.
10544 
10545   Try to use the fields in the order given by 'order' to allow one to
10546   optimize away 'order by'.
10547 */
10548 
10549 static ORDER *
create_distinct_group(THD * thd,Ref_ptr_array ref_pointer_array,ORDER * order_list,List<Item> & fields,List<Item> & all_fields,bool * all_order_by_fields_used)10550 create_distinct_group(THD *thd, Ref_ptr_array ref_pointer_array,
10551                       ORDER *order_list, List<Item> &fields,
10552                       List<Item> &all_fields,
10553 		      bool *all_order_by_fields_used)
10554 {
10555   List_iterator<Item> li(fields);
10556   Item *item;
10557   ORDER *order,*group,**prev;
10558 
10559   *all_order_by_fields_used= 1;
10560   while ((item=li++))
10561     item->marker=0;			/* Marker that field is not used */
10562 
10563   prev= &group;  group=0;
10564   for (order=order_list ; order; order=order->next)
10565   {
10566     if (order->in_field_list)
10567     {
10568       ORDER *ord=(ORDER*) thd->memdup((char*) order,sizeof(ORDER));
10569       if (!ord)
10570 	return 0;
10571       *prev=ord;
10572       prev= &ord->next;
10573       (*ord->item)->marker=1;
10574     }
10575     else
10576       *all_order_by_fields_used= 0;
10577   }
10578 
10579   li.rewind();
10580   while ((item=li++))
10581   {
10582     if (!item->const_item() && !item->with_sum_func && !item->marker)
10583     {
10584       /*
10585         Don't put duplicate columns from the SELECT list into the
10586         GROUP BY list.
10587       */
10588       ORDER *ord_iter;
10589       for (ord_iter= group; ord_iter; ord_iter= ord_iter->next)
10590         if ((*ord_iter->item)->eq(item, 1))
10591           goto next_item;
10592 
10593       ORDER *ord=(ORDER*) thd->mem_calloc(sizeof(ORDER));
10594       if (!ord)
10595 	return 0;
10596 
10597       if (item->type() == Item::FIELD_ITEM &&
10598           item->field_type() == MYSQL_TYPE_BIT)
10599       {
10600         /*
10601           Because HEAP tables can't index BIT fields we need to use an
10602           additional hidden field for grouping because later it will be
10603           converted to a LONG field. Original field will remain of the
10604           BIT type and will be returned to a client.
10605           @note setup_ref_array() needs to account for the extra space.
10606         */
10607         Item_field *new_item= new Item_field(thd, (Item_field*)item);
10608         ord->item= thd->lex->current_select()->add_hidden_item(new_item);
10609       }
10610       else
10611       {
10612         /*
10613           We have here only field_list (not all_field_list), so we can use
10614           simple indexing of ref_pointer_array (order in the array and in the
10615           list are same)
10616         */
10617         ord->item= &ref_pointer_array[0];
10618       }
10619       ord->direction= ORDER::ORDER_ASC;
10620       *prev=ord;
10621       prev= &ord->next;
10622     }
10623 next_item:
10624     ref_pointer_array.pop_front();
10625   }
10626   *prev=0;
10627   return group;
10628 }
10629 
10630 
10631 /**
10632   Return table number if there is only one table in sort order
10633   and group and order is compatible, else return 0.
10634 */
10635 
10636 static TABLE *
get_sort_by_table(ORDER * a,ORDER * b,TABLE_LIST * tables)10637 get_sort_by_table(ORDER *a,ORDER *b,TABLE_LIST *tables)
10638 {
10639   table_map map= (table_map) 0;
10640   DBUG_ENTER("get_sort_by_table");
10641 
10642   if (!a)
10643     a=b;					// Only one need to be given
10644   else if (!b)
10645     b=a;
10646 
10647   for (; a && b; a=a->next,b=b->next)
10648   {
10649     if (!(*a->item)->eq(*b->item,1))
10650       DBUG_RETURN(0);
10651     map|=a->item[0]->used_tables();
10652   }
10653   map&= ~PARAM_TABLE_BIT;
10654   if (!map || (map & (RAND_TABLE_BIT | OUTER_REF_TABLE_BIT)))
10655     DBUG_RETURN(0);
10656 
10657   for (; !(map & tables->map()); tables= tables->next_leaf) ;
10658   if (map != tables->map())
10659     DBUG_RETURN(0);				// More than one table
10660   DBUG_PRINT("exit",("sort by table: %d",tables->tableno()));
10661   DBUG_RETURN(tables->table);
10662 }
10663 
10664 
10665 /**
10666   Create a condition for a const reference for a table.
10667 
10668   @param thd      THD pointer
10669   @param join_tab pointer to the table
10670 
10671   @return A pointer to the created condition for the const reference.
10672   @retval !NULL if the condition was created successfully
10673   @retval NULL if an error has occured
10674 */
10675 
create_cond_for_const_ref(THD * thd,JOIN_TAB * join_tab)10676 static Item_cond_and *create_cond_for_const_ref(THD *thd, JOIN_TAB *join_tab)
10677 {
10678   DBUG_ENTER("create_cond_for_const_ref");
10679   assert(join_tab->ref().key_parts);
10680 
10681   TABLE *table= join_tab->table();
10682   Item_cond_and *cond= new Item_cond_and();
10683   if (!cond)
10684     DBUG_RETURN(NULL);
10685 
10686   for (uint i=0 ; i < join_tab->ref().key_parts ; i++)
10687   {
10688     Field *field= table->field[table->key_info[join_tab->ref().key].key_part[i].
10689                                fieldnr-1];
10690     Item *value= join_tab->ref().items[i];
10691     Item *item= new Item_field(field);
10692     if (!item)
10693       DBUG_RETURN(NULL);
10694     item= join_tab->ref().null_rejecting & ((key_part_map)1 << i) ?
10695             (Item *)new Item_func_eq(item, value) :
10696             (Item *)new Item_func_equal(item, value);
10697     if (!item)
10698       DBUG_RETURN(NULL);
10699     if (cond->add(item))
10700       DBUG_RETURN(NULL);
10701   }
10702   cond->fix_fields(thd, (Item**)&cond);
10703 
10704   DBUG_RETURN(cond);
10705 }
10706 
10707 /**
10708   Create a condition for a const reference and add this to the
10709   currenct select for the table.
10710 */
10711 
add_ref_to_table_cond(THD * thd,JOIN_TAB * join_tab)10712 static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab)
10713 {
10714   DBUG_ENTER("add_ref_to_table_cond");
10715   if (!join_tab->ref().key_parts)
10716     DBUG_RETURN(FALSE);
10717 
10718   int error= 0;
10719 
10720   /* Create a condition representing the const reference. */
10721   Item_cond_and *cond= create_cond_for_const_ref(thd, join_tab);
10722   if (!cond)
10723     DBUG_RETURN(TRUE);
10724 
10725   /* Add this condition to the existing select condtion */
10726   if (join_tab->condition())
10727   {
10728     error=(int) cond->add(join_tab->condition());
10729     cond->update_used_tables();
10730   }
10731   join_tab->set_condition(cond);
10732   Opt_trace_object(&thd->opt_trace).add("added_back_ref_condition", cond);
10733 
10734   DBUG_RETURN(error ? TRUE : FALSE);
10735 }
10736 
10737 
10738 /**
10739   Remove additional condition inserted by IN/ALL/ANY transformation.
10740 
10741   @param conds   condition for processing
10742 
10743   @return
10744     new conditions
10745 
10746   @note that this function has Bug#13915291.
10747 */
10748 
remove_additional_cond(Item * conds)10749 static Item *remove_additional_cond(Item* conds)
10750 {
10751   // Because it uses in_additional_cond it applies only to the scalar case.
10752   if (conds->item_name.ptr() == in_additional_cond)
10753     return 0;
10754   if (conds->type() == Item::COND_ITEM)
10755   {
10756     Item_cond *cnd= (Item_cond*) conds;
10757     List_iterator<Item> li(*(cnd->argument_list()));
10758     Item *item;
10759     while ((item= li++))
10760     {
10761       if (item->item_name.ptr() == in_additional_cond)
10762       {
10763 	li.remove();
10764 	if (cnd->argument_list()->elements == 1)
10765 	  return cnd->argument_list()->head();
10766 	return conds;
10767       }
10768     }
10769   }
10770   return conds;
10771 }
10772 
10773 
10774 /**
10775   Update some values in keyuse for faster choose_table_order() loop.
10776 
10777   @todo Check if this is the real meaning of ref_table_rows.
10778 
10779   @param keyuse_array  Array of Key_use elements being updated.
10780 
10781 
10782 */
10783 
optimize_keyuse()10784 void JOIN::optimize_keyuse()
10785 {
10786   for (size_t ix= 0; ix < keyuse_array.size(); ++ix)
10787   {
10788     Key_use *keyuse= &keyuse_array.at(ix);
10789     table_map map;
10790     /*
10791       If we find a ref, assume this table matches a proportional
10792       part of this table.
10793       For example 100 records matching a table with 5000 records
10794       gives 5000/100 = 50 records per key
10795       Constant tables are ignored.
10796       To avoid bad matches, we don't make ref_table_rows less than 100.
10797     */
10798     keyuse->ref_table_rows= ~(ha_rows) 0;	// If no ref
10799     if (keyuse->used_tables &
10800        (map= (keyuse->used_tables & ~const_table_map & ~PSEUDO_TABLE_BITS)))
10801     {
10802       uint tableno;
10803       for (tableno= 0; ! (map & 1) ; map>>=1, tableno++)
10804       {}
10805       if (map == 1)			// Only one table
10806       {
10807 	TABLE *tmp_table= join_tab[tableno].table();
10808 
10809 	keyuse->ref_table_rows= max<ha_rows>(tmp_table->file->stats.records, 100);
10810       }
10811     }
10812     /*
10813       Outer reference (external field) is constant for single executing
10814       of subquery
10815     */
10816     if (keyuse->used_tables == OUTER_REF_TABLE_BIT)
10817       keyuse->ref_table_rows= 1;
10818   }
10819 }
10820 
10821 /**
10822   Function sets FT hints, initializes FT handlers
10823   and checks if FT index can be used as covered.
10824 */
10825 
optimize_fts_query()10826 bool JOIN::optimize_fts_query()
10827 {
10828   ASSERT_BEST_REF_IN_JOIN_ORDER(this);
10829 
10830   assert(select_lex->has_ft_funcs());
10831 
10832   for (uint i= const_tables; i < tables; i++)
10833   {
10834     JOIN_TAB *tab= best_ref[i];
10835     if (tab->type() != JT_FT)
10836       continue;
10837 
10838     Item_func_match *ifm;
10839     Item_func_match* ft_func=
10840       static_cast<Item_func_match*>(tab->position()->key->val);
10841     List_iterator<Item_func_match> li(*(select_lex->ftfunc_list));
10842 
10843     while ((ifm= li++))
10844     {
10845       if (!(ifm->used_tables() & tab->table_ref->map()) || ifm->master)
10846         continue;
10847 
10848       if (ifm != ft_func)
10849       {
10850         if (ifm->can_skip_ranking())
10851           ifm->set_hints(this, FT_NO_RANKING, HA_POS_ERROR, false);
10852       }
10853     }
10854 
10855     /*
10856       Check if internal sorting is needed. FT_SORTED flag is set
10857       if no ORDER BY clause or ORDER BY MATCH function is the same
10858       as the function that is used for FT index and FT table is
10859       the first non-constant table in the JOIN.
10860     */
10861     if (i == const_tables &&
10862         !(ft_func->get_hints()->get_flags() & FT_BOOL) &&
10863         (!order || ft_func == test_if_ft_index_order(order)))
10864       ft_func->set_hints(this, FT_SORTED, m_select_limit, false);
10865 
10866     /*
10867       Check if ranking is not needed. FT_NO_RANKING flag is set if
10868       MATCH function is used only in WHERE condition and  MATCH
10869       function is not part of an expression.
10870     */
10871     if (ft_func->can_skip_ranking())
10872       ft_func->set_hints(this, FT_NO_RANKING,
10873                          !order ? m_select_limit : HA_POS_ERROR, false);
10874   }
10875 
10876   return init_ftfuncs(thd, select_lex);
10877 }
10878 
10879 
10880 /**
10881   Check if FTS index only access is possible.
10882 
10883   @param tab  pointer to JOIN_TAB structure.
10884 
10885   @return  TRUE if index only access is possible,
10886            FALSE otherwise.
10887 */
10888 
fts_index_access(JOIN_TAB * tab)10889 bool JOIN::fts_index_access(JOIN_TAB *tab)
10890 {
10891   assert(tab->type() == JT_FT);
10892   TABLE *table= tab->table();
10893 
10894   if ((table->file->ha_table_flags() & HA_CAN_FULLTEXT_EXT) == 0)
10895     return false; // Optimizations requires extended FTS support by table engine
10896 
10897   /*
10898     This optimization does not work with filesort nor GROUP BY
10899   */
10900   if (grouped || (order && ordered_index_usage != ordered_index_order_by))
10901     return false;
10902 
10903   /*
10904     Check whether the FTS result is covering.  If only document id
10905     and rank is needed, there is no need to access table rows.
10906   */
10907   for (uint i= bitmap_get_first_set(table->read_set);
10908        i < table->s->fields;
10909        i= bitmap_get_next_set(table->read_set, i))
10910   {
10911     if (table->field[i] != table->fts_doc_id_field ||
10912         !tab->ft_func()->docid_in_result())
10913     return false;
10914   }
10915 
10916   return true;
10917 }
10918 
10919 
10920 /**
10921    For {semijoin,subquery} materialization: calculates various cost
10922    information, based on a plan in join->best_positions covering the
10923    to-be-materialized query block and only this.
10924 
10925    @param join     JOIN where plan can be found
10926    @param sj_nest  sj materialization nest (NULL if subquery materialization)
10927    @param n_tables number of to-be-materialized tables
10928    @param[out] sjm where computed costs will be stored
10929 
10930    @note that this function modifies join->map2table, which has to be filled
10931    correctly later.
10932 */
calculate_materialization_costs(JOIN * join,TABLE_LIST * sj_nest,uint n_tables,Semijoin_mat_optimize * sjm)10933 static void calculate_materialization_costs(JOIN *join,
10934                                             TABLE_LIST *sj_nest,
10935                                             uint n_tables,
10936                                             Semijoin_mat_optimize *sjm)
10937 {
10938   double mat_cost;             // Estimated cost of materialization
10939   double mat_rowcount;         // Estimated row count before duplicate removal
10940   double distinct_rowcount;    // Estimated rowcount after duplicate removal
10941   List<Item> *inner_expr_list;
10942 
10943   if (sj_nest)
10944   {
10945     /*
10946       get_partial_join_cost() assumes a regular join, which is correct when
10947       we optimize a sj-materialization nest (always executed as regular
10948       join).
10949     */
10950     get_partial_join_cost(join, n_tables, &mat_cost, &mat_rowcount);
10951     n_tables+= join->const_tables;
10952     inner_expr_list= &sj_nest->nested_join->sj_inner_exprs;
10953   }
10954   else
10955   {
10956     mat_cost= join->best_read;
10957     mat_rowcount= static_cast<double>(join->best_rowcount);
10958     inner_expr_list= &join->select_lex->item_list;
10959   }
10960 
10961   /*
10962     Adjust output cardinality estimates. If the subquery has form
10963 
10964     ... oe IN (SELECT t1.colX, t2.colY, func(X,Y,Z) )
10965 
10966     then the number of distinct output record combinations has an
10967     upper bound of product of number of records matching the tables
10968     that are used by the SELECT clause.
10969     TODO:
10970     We can get a more precise estimate if we
10971      - use rec_per_key cardinality estimates. For simple cases like
10972      "oe IN (SELECT t.key ...)" it is trivial.
10973      - Functional dependencies between the tables in the semi-join
10974      nest (the payoff is probably less here?)
10975   */
10976   {
10977     for (uint i=0 ; i < n_tables ; i++)
10978     {
10979       JOIN_TAB * const tab= join->best_positions[i].table;
10980       join->map2table[tab->table_ref->tableno()]= tab;
10981     }
10982     List_iterator<Item> it(*inner_expr_list);
10983     Item *item;
10984     table_map map= 0;
10985     while ((item= it++))
10986       map|= item->used_tables();
10987     map&= ~PSEUDO_TABLE_BITS;
10988     Table_map_iterator tm_it(map);
10989     int tableno;
10990     double rows= 1.0;
10991     while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
10992       rows*= join->map2table[tableno]->table()->quick_condition_rows;
10993     distinct_rowcount= min(mat_rowcount, rows);
10994   }
10995   /*
10996     Calculate temporary table parameters and usage costs
10997   */
10998   const uint rowlen= get_tmp_table_rec_length(*inner_expr_list);
10999 
11000   const Cost_model_server *cost_model= join->cost_model();
11001 
11002   Cost_model_server::enum_tmptable_type tmp_table_type;
11003   if (rowlen * distinct_rowcount < join->thd->variables.max_heap_table_size)
11004     tmp_table_type= Cost_model_server::MEMORY_TMPTABLE;
11005   else
11006     tmp_table_type= Cost_model_server::DISK_TMPTABLE;
11007 
11008   /*
11009     Let materialization cost include the cost to create the temporary
11010     table and write the rows into it:
11011   */
11012   mat_cost+= cost_model->tmptable_create_cost(tmp_table_type);
11013   mat_cost+= cost_model->tmptable_readwrite_cost(tmp_table_type, mat_rowcount,
11014                                                  0.0);
11015 
11016   sjm->materialization_cost.reset();
11017   sjm->materialization_cost.add_io(mat_cost);
11018 
11019   sjm->expected_rowcount= distinct_rowcount;
11020 
11021   /*
11022     Set the cost to do a full scan of the temptable (will need this to
11023     consider doing sjm-scan):
11024   */
11025   sjm->scan_cost.reset();
11026   if (distinct_rowcount > 0.0)
11027   {
11028     const double scan_cost=
11029       cost_model->tmptable_readwrite_cost(tmp_table_type,
11030                                           0.0, distinct_rowcount);
11031     sjm->scan_cost.add_io(scan_cost);
11032   }
11033 
11034   // The cost to lookup a row in temp. table
11035   const double row_cost= cost_model->tmptable_readwrite_cost(tmp_table_type,
11036                                                              0.0, 1.0);
11037   sjm->lookup_cost.reset();
11038   sjm->lookup_cost.add_io(row_cost);
11039 }
11040 
11041 
11042 /**
11043    Decides between EXISTS and materialization; performs last steps to set up
11044    the chosen strategy.
11045    @returns 'false' if no error
11046 
11047    @note If UNION this is called on each contained JOIN.
11048 
11049  */
decide_subquery_strategy()11050 bool JOIN::decide_subquery_strategy()
11051 {
11052   assert(unit->item);
11053 
11054   switch (unit->item->substype())
11055   {
11056   case Item_subselect::IN_SUBS:
11057   case Item_subselect::ALL_SUBS:
11058   case Item_subselect::ANY_SUBS:
11059     // All of those are children of Item_in_subselect and may use EXISTS
11060     break;
11061   default:
11062     return false;
11063   }
11064 
11065   Item_in_subselect * const in_pred=
11066     static_cast<Item_in_subselect *>(unit->item);
11067 
11068   Item_exists_subselect::enum_exec_method chosen_method= in_pred->exec_method;
11069   // Materialization does not allow UNION so this can't happen:
11070   assert(chosen_method != Item_exists_subselect::EXEC_MATERIALIZATION);
11071 
11072   if ((chosen_method == Item_exists_subselect::EXEC_EXISTS_OR_MAT) &&
11073       compare_costs_of_subquery_strategies(&chosen_method))
11074     return true;
11075 
11076   switch (chosen_method)
11077   {
11078   case Item_exists_subselect::EXEC_EXISTS:
11079     return in_pred->finalize_exists_transform(select_lex);
11080   case Item_exists_subselect::EXEC_MATERIALIZATION:
11081     return in_pred->finalize_materialization_transform(this);
11082   default:
11083     assert(false);
11084     return true;
11085   }
11086 }
11087 
11088 
11089 /**
11090    Tells what is the cheapest between IN->EXISTS and subquery materialization,
11091    in terms of cost, for the subquery's JOIN.
11092    Input:
11093    - join->{best_positions,best_read,best_rowcount} must contain the
11094    execution plan of EXISTS (where 'join' is the subquery's JOIN)
11095    - join2->{best_positions,best_read,best_rowcount} must be correctly set
11096    (where 'join2' is the parent join, the grandparent join, etc).
11097    Output:
11098    join->{best_positions,best_read,best_rowcount} contain the cheapest
11099    execution plan (where 'join' is the subquery's JOIN).
11100 
11101    This plan choice has to happen before calling functions which set up
11102    execution structures, like JOIN::get_best_combination().
11103 
11104    @param[out] method  chosen method (EXISTS or materialization) will be put
11105                        here.
11106    @returns false if success
11107 */
compare_costs_of_subquery_strategies(Item_exists_subselect::enum_exec_method * method)11108 bool JOIN::compare_costs_of_subquery_strategies(
11109                Item_exists_subselect::enum_exec_method *method)
11110 {
11111   *method= Item_exists_subselect::EXEC_EXISTS;
11112 
11113   Item_exists_subselect::enum_exec_method allowed_strategies=
11114     select_lex->subquery_strategy(thd);
11115 
11116   if (allowed_strategies == Item_exists_subselect::EXEC_EXISTS)
11117     return false;
11118 
11119   assert(allowed_strategies == Item_exists_subselect::EXEC_EXISTS_OR_MAT ||
11120          allowed_strategies == Item_exists_subselect::EXEC_MATERIALIZATION);
11121 
11122   const JOIN *parent_join= unit->outer_select()->join;
11123   if (!parent_join || !parent_join->child_subquery_can_materialize)
11124     return false;
11125 
11126   Item_in_subselect * const in_pred=
11127     static_cast<Item_in_subselect *>(unit->item);
11128 
11129   /*
11130     Testing subquery_allows_etc() at each optimization is necessary as each
11131     execution of a prepared statement may use a different type of parameter.
11132   */
11133   if (!subquery_allows_materialization(in_pred, thd, select_lex,
11134                                        select_lex->outer_select()))
11135     return false;
11136 
11137   Opt_trace_context * const trace= &thd->opt_trace;
11138   Opt_trace_object trace_wrapper(trace);
11139   Opt_trace_object
11140     trace_subqmat(trace, "execution_plan_for_potential_materialization");
11141   const double saved_best_read= best_read;
11142   const ha_rows saved_best_rowcount= best_rowcount;
11143   POSITION * const saved_best_pos= best_positions;
11144 
11145   if (in_pred->in2exists_added_to_where())
11146   {
11147     Opt_trace_array trace_subqmat_steps(trace, "steps");
11148 
11149     // Up to one extra slot per semi-join nest is needed (if materialized)
11150     const uint sj_nests= select_lex->sj_nests.elements;
11151 
11152     if (!(best_positions= new (thd->mem_root) POSITION[tables + sj_nests]))
11153       return true;
11154 
11155     // Compute plans which do not use outer references
11156 
11157     assert(allow_outer_refs);
11158     allow_outer_refs= false;
11159 
11160     if (optimize_semijoin_nests_for_materialization(this))
11161       return true;
11162 
11163     if (Optimize_table_order(thd, this, NULL).choose_table_order())
11164       return true;
11165   }
11166   else
11167   {
11168     /*
11169       If IN->EXISTS didn't add any condition to WHERE (only to HAVING, which
11170       can happen if subquery has aggregates) then the plan for materialization
11171       will be the same as for EXISTS - don't compute it again.
11172     */
11173     trace_subqmat.add("surely_same_plan_as_EXISTS", true).
11174       add_alnum("cause", "EXISTS_did_not_change_WHERE");
11175   }
11176 
11177   Semijoin_mat_optimize sjm;
11178   calculate_materialization_costs(this, NULL, primary_tables, &sjm);
11179 
11180   /*
11181     The number of evaluations of the subquery influences costs, we need to
11182     compute it.
11183   */
11184   Opt_trace_object trace_subq_mat_decision(trace, "subq_mat_decision");
11185   Opt_trace_array trace_parents(trace, "parent_fanouts");
11186   const Item_subselect *subs= in_pred;
11187   double subq_executions= 1.0;
11188   for(;;)
11189   {
11190     Opt_trace_object trace_parent(trace);
11191     trace_parent.add_select_number(parent_join->select_lex->select_number);
11192     double parent_fanout;
11193     if (// safety, not sure needed
11194         parent_join->plan_is_const() ||
11195         // if subq is in condition on constant table:
11196         !parent_join->child_subquery_can_materialize)
11197     {
11198       parent_fanout= 1.0;
11199       trace_parent.add("subq_attached_to_const_table", true);
11200     }
11201     else
11202     {
11203       if (subs->in_cond_of_tab != NO_PLAN_IDX)
11204       {
11205         /*
11206           Subquery is attached to a certain 'pos', pos[-1].prefix_rowcount
11207           is the number of times we'll start a loop accessing 'pos'; each such
11208           loop will read pos->rows_fetched rows of 'pos', so subquery will
11209           be evaluated pos[-1].prefix_rowcount * pos->rows_fetched times.
11210           Exceptions:
11211           - if 'pos' is first, use 1.0 instead of pos[-1].prefix_rowcount
11212           - if 'pos' is first of a sj-materialization nest, same.
11213 
11214           If in a sj-materialization nest, pos->rows_fetched and
11215           pos[-1].prefix_rowcount are of the "nest materialization" plan
11216           (copied back in fix_semijoin_strategies()), which is
11217           appropriate as it corresponds to evaluations of our subquery.
11218 
11219           pos->prefix_rowcount is not suitable because if we have:
11220           select ... from ot1 where ot1.col in
11221             (select it1.col1 from it1 where it1.col2 not in (subq));
11222           and subq does subq-mat, and plan is ot1 - it1+firstmatch(ot1),
11223           then:
11224           - t1.prefix_rowcount==1 (due to firstmatch)
11225           - subq is attached to it1, and is evaluated for each row read from
11226             t1, potentially way more than 1.
11227        */
11228         const uint idx= subs->in_cond_of_tab;
11229         assert((int)idx >= 0 && idx < parent_join->tables);
11230         trace_parent.add("subq_attached_to_table", true);
11231         QEP_TAB *const parent_tab= &parent_join->qep_tab[idx];
11232         trace_parent.add_utf8_table(parent_tab->table_ref);
11233         parent_fanout= parent_tab->position()->rows_fetched;
11234         if ((idx > parent_join->const_tables) &&
11235             !sj_is_materialize_strategy(parent_tab->position()->sj_strategy))
11236           parent_fanout*=
11237             parent_tab[-1].position()->prefix_rowcount;
11238       }
11239       else
11240       {
11241         /*
11242           Subquery is SELECT list, GROUP BY, ORDER BY, HAVING: it is evaluated
11243           at the end of the parent join's execution.
11244           It can be evaluated once per row-before-grouping:
11245           SELECT SUM(t1.col IN (subq)) FROM t1 GROUP BY expr;
11246           or once per row-after-grouping:
11247           SELECT SUM(t1.col) AS s FROM t1 GROUP BY expr HAVING s IN (subq),
11248           SELECT SUM(t1.col) IN (subq) FROM t1 GROUP BY expr
11249           It's hard to tell. We simply assume 'once per
11250           row-before-grouping'.
11251 
11252           Another approximation:
11253           SELECT ... HAVING x IN (subq) LIMIT 1
11254           best_rowcount=1 due to LIMIT, though HAVING (and thus the subquery)
11255           may be evaluated many times before HAVING becomes true and the limit
11256           is reached.
11257         */
11258         trace_parent.add("subq_attached_to_join_result", true);
11259         parent_fanout= static_cast<double>(parent_join->best_rowcount);
11260       }
11261     }
11262     subq_executions*= parent_fanout;
11263     trace_parent.add("fanout", parent_fanout);
11264     const bool cacheable= parent_join->select_lex->is_cacheable();
11265     trace_parent.add("cacheable", cacheable);
11266     if (cacheable)
11267     {
11268       // Parent executed only once
11269       break;
11270     }
11271     /*
11272       Parent query is executed once per outer row => go up to find number of
11273       outer rows. Example:
11274       SELECT ... IN(subq-with-in2exists WHERE ... IN (subq-with-mat))
11275     */
11276     if (!(subs= parent_join->unit->item))
11277     {
11278       // derived table, materialized only once
11279       break;
11280     }
11281     parent_join= parent_join->unit->outer_select()->join;
11282     if (!parent_join)
11283     {
11284       /*
11285         May be single-table UPDATE/DELETE, has no join.
11286         @todo  we should find how many rows it plans to UPDATE/DELETE, taking
11287         inspiration in Explain_table::explain_rows_and_filtered().
11288         This is not a priority as it applies only to
11289         UPDATE - child(non-mat-subq) - grandchild(may-be-mat-subq).
11290         And it will autosolve the day UPDATE gets a JOIN.
11291       */
11292       break;
11293     }
11294   }  // for(;;)
11295   trace_parents.end();
11296 
11297   const double cost_exists= subq_executions * saved_best_read;
11298   const double cost_mat_table= sjm.materialization_cost.total_cost();
11299   const double cost_mat= cost_mat_table + subq_executions *
11300     sjm.lookup_cost.total_cost();
11301   const bool mat_chosen=
11302     (allowed_strategies == Item_exists_subselect::EXEC_EXISTS_OR_MAT) ?
11303     (cost_mat < cost_exists) : true;
11304   trace_subq_mat_decision
11305     .add("cost_to_create_and_fill_materialized_table",
11306          cost_mat_table)
11307     .add("cost_of_one_EXISTS", saved_best_read)
11308     .add("number_of_subquery_evaluations", subq_executions)
11309     .add("cost_of_materialization", cost_mat)
11310     .add("cost_of_EXISTS", cost_exists)
11311     .add("chosen", mat_chosen);
11312   if (mat_chosen)
11313     *method= Item_exists_subselect::EXEC_MATERIALIZATION;
11314   else
11315   {
11316     best_read= saved_best_read;
11317     best_rowcount= saved_best_rowcount;
11318     best_positions= saved_best_pos;
11319     /*
11320       Don't restore JOIN::positions or best_ref, they're not used
11321       afterwards. best_positions is (like: by get_sj_strategy()).
11322     */
11323   }
11324   return false;
11325 }
11326 
11327 
11328 /**
11329   Optimize rollup specification.
11330 
11331   Allocate objects needed for rollup processing.
11332 
11333   @returns false if success, true if error.
11334 */
11335 
optimize_rollup()11336 bool JOIN::optimize_rollup()
11337 {
11338   tmp_table_param.quick_group= 0;	// Can't create groups in tmp table
11339   rollup.state= ROLLUP::STATE_INITED;
11340 
11341   /*
11342     Create pointers to the different sum function groups
11343     These are updated by rollup_make_fields()
11344   */
11345   tmp_table_param.group_parts= send_group_parts;
11346   /*
11347     substitute_gc() might substitute an expression in the GROUP BY list with
11348     a generated column. In such case the GC is added to the all_fields as a
11349     hidden field. In total, all_fields list could be grown by up to
11350     send_group_parts columns. Reserve space for them here.
11351   */
11352   const uint ref_array_size= all_fields.elements + send_group_parts;
11353 
11354   Item_null_result **null_items=
11355     static_cast<Item_null_result**>(thd->alloc(sizeof(Item*)*send_group_parts));
11356 
11357   rollup.null_items= Item_null_array(null_items, send_group_parts);
11358   rollup.ref_pointer_arrays=
11359     static_cast<Ref_ptr_array*>
11360     (thd->alloc((sizeof(Ref_ptr_array) +
11361                  ref_array_size * sizeof(Item*)) * send_group_parts));
11362   rollup.fields=
11363     static_cast<List<Item>*>(thd->alloc(sizeof(List<Item>) * send_group_parts));
11364 
11365   if (!null_items || !rollup.ref_pointer_arrays || !rollup.fields)
11366     return true;
11367 
11368   Item **ref_array= (Item**) (rollup.ref_pointer_arrays+send_group_parts);
11369 
11370   /*
11371     Prepare space for field list for the different levels
11372     These will be filled up in rollup_make_fields()
11373   */
11374   ORDER *group= group_list;
11375   for (uint i= 0; i < send_group_parts; i++, group= group->next)
11376   {
11377     rollup.null_items[i]=
11378       new (thd->mem_root) Item_null_result((*group->item)->field_type(),
11379                                            (*group->item)->result_type());
11380     if (rollup.null_items[i] == NULL)
11381       return true;           /* purecov: inspected */
11382     List<Item> *rollup_fields= &rollup.fields[i];
11383     rollup_fields->empty();
11384     rollup.ref_pointer_arrays[i]= Ref_ptr_array(ref_array, ref_array_size);
11385     ref_array+= ref_array_size;
11386   }
11387   for (uint i= 0; i < send_group_parts; i++)
11388   {
11389     for (uint j= 0; j < fields_list.elements; j++)
11390       rollup.fields[i].push_back(rollup.null_items[i]);
11391   }
11392   return false;
11393 }
11394 
11395 
11396 /**
11397   Refine the best_rowcount estimation based on what happens after tables
11398   have been joined: LIMIT and type of result sink.
11399  */
refine_best_rowcount()11400 void JOIN::refine_best_rowcount()
11401 {
11402   // If plan is const, 0 or 1 rows should be returned
11403   assert(!plan_is_const() || best_rowcount <= 1);
11404 
11405   if (plan_is_const())
11406     return;
11407 
11408   /*
11409     If a derived table, or a member of a UNION which itself forms a derived
11410     table:
11411     setting estimate to 0 or 1 row would mark the derived table as const.
11412     The row count is bumped to the nearest higher value, so that the
11413     query block will not be evaluated during optimization.
11414   */
11415   if (best_rowcount <= 1 &&
11416       select_lex->master_unit()->first_select()->linkage ==
11417       DERIVED_TABLE_TYPE)
11418     best_rowcount= 2;
11419 
11420   /*
11421     There will be no more rows than defined in the LIMIT clause. Use it
11422     as an estimate. If LIMIT 1 is specified, the query block will be
11423     considered "const", with actual row count 0 or 1.
11424   */
11425   set_if_smaller(best_rowcount, unit->select_limit_cnt);
11426 }
11427 
11428 /**
11429   @} (end of group Query_Optimizer)
11430 */
11431 
11432 /**
11433   This function is used to get the key length of Item object on
11434   which one tmp field will be created during create_tmp_table.
11435   This function references KEY_PART_INFO::init_from_field().
11436 
11437   @param item  A inner item of outer join
11438 
11439   @return  The length of a item to be as a key of a temp table
11440 */
11441 
get_key_length_tmp_table(Item * item)11442 static uint32 get_key_length_tmp_table(Item *item)
11443 {
11444   uint32 len= 0;
11445 
11446   item= item->real_item();
11447   if (item->type() == Item::FIELD_ITEM)
11448     len= ((Item_field *)item)->field->key_length();
11449   else
11450     len= item->max_length;
11451 
11452   if (item->maybe_null)
11453     len+= HA_KEY_NULL_LENGTH;
11454 
11455   // references KEY_PART_INFO::init_from_field()
11456   enum_field_types type= item->field_type();
11457   if (type == MYSQL_TYPE_BLOB ||
11458       type == MYSQL_TYPE_VARCHAR ||
11459       type == MYSQL_TYPE_GEOMETRY)
11460     len+= HA_KEY_BLOB_LENGTH;
11461 
11462   return len;
11463 }
11464 
11465