1 /* Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
22 
23 /**
24   @file
25 
26   @brief
27   mysql_select and join optimization
28 
29 
30   @defgroup Query_Optimizer  Query Optimizer
31   @{
32 */
33 
34 #include "sql_select.h"
35 #include "sql_optimizer.h"
36 #include "sql_resolver.h"                  // subquery_allows_materialization
37 #include "sql_executor.h"
38 #include "sql_planner.h"
39 #include "debug_sync.h"          // DEBUG_SYNC
40 #include "opt_trace.h"
41 #include "sql_derived.h"
42 #include "sql_test.h"
43 #include "sql_base.h"
44 #include "sql_parse.h"
45 #include "my_bit.h"
46 #include "lock.h"
47 #include "abstract_query_plan.h"
48 #include "opt_explain_format.h"  // Explain_format_flags
49 
50 #include <algorithm>
51 using std::max;
52 using std::min;
53 
54 static bool make_join_statistics(JOIN *join, TABLE_LIST *leaves, Item *conds,
55                                  Key_use_array *keyuse,
56                                  bool first_optimization);
57 static bool optimize_semijoin_nests_for_materialization(JOIN *join);
58 static void calculate_materialization_costs(JOIN *join, TABLE_LIST *sj_nest,
59                                             uint n_tables,
60                                             Semijoin_mat_optimize *sjm);
61 static void make_outerjoin_info(JOIN *join);
62 static bool make_join_select(JOIN *join, Item *item);
63 static bool list_contains_unique_index(JOIN_TAB *tab,
64                           bool (*find_func) (Field *, void *), void *data);
65 static bool find_field_in_item_list (Field *field, void *data);
66 static bool find_field_in_order_list (Field *field, void *data);
67 static ORDER *create_distinct_group(THD *thd, Ref_ptr_array ref_pointer_array,
68                                     ORDER *order, List<Item> &fields,
69                                     List<Item> &all_fields,
70 				    bool *all_order_by_fields_used);
71 static TABLE *get_sort_by_table(ORDER *a,ORDER *b,TABLE_LIST *tables);
72 static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab);
73 static Item *remove_additional_cond(Item* conds);
74 static bool simplify_joins(JOIN *join, List<TABLE_LIST> *join_list,
75                            Item *conds, bool top, bool in_sj,
76                            Item **new_conds,
77                            uint *changelog= NULL);
78 static bool record_join_nest_info(st_select_lex *select,
79                                   List<TABLE_LIST> *tables);
80 static uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list,
81                                           uint first_unused);
82 static ORDER *remove_const(JOIN *join,ORDER *first_order, Item *cond,
83                            bool change_list, bool *simple_order,
84                            const char *clause_type);
85 static void save_index_subquery_explain_info(JOIN_TAB *join_tab, Item* where);
86 static void trace_table_dependencies(Opt_trace_context * trace,
87                                      JOIN_TAB *join_tabs,
88                                      uint table_count);
89 static bool
90 update_ref_and_keys(THD *thd, Key_use_array *keyuse,JOIN_TAB *join_tab,
91                     uint tables, Item *cond, COND_EQUAL *cond_equal,
92                     table_map normal_tables, SELECT_LEX *select_lex,
93                     SARGABLE_PARAM **sargables);
94 static bool pull_out_semijoin_tables(JOIN *join);
95 static void set_position(JOIN *join, uint idx, JOIN_TAB *table, Key_use *key);
96 static void add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab);
97 static ha_rows get_quick_record_count(THD *thd, SQL_SELECT *select,
98 				      TABLE *table,
99 				      const key_map *keys,ha_rows limit);
100 static void optimize_keyuse(JOIN *join, Key_use_array *keyuse_array);
101 static Item *
102 make_cond_for_table_from_pred(Item *root_cond, Item *cond,
103                               table_map tables, table_map used_table,
104                               bool exclude_expensive_cond);
105 static bool
106 only_eq_ref_tables(JOIN *join, ORDER *order, table_map tables,
107                    table_map *cached_eq_ref_tables, table_map
108                    *eq_ref_tables);
109 
110 static bool can_switch_from_ref_to_range(THD *thd, JOIN_TAB *tab);
111 
112 /**
113   global select optimisation.
114 
115   @note
116     error code saved in field 'error'
117 
118   @retval
119     0   success
120   @retval
121     1   error
122 */
123 
124 int
optimize()125 JOIN::optimize()
126 {
127   ulonglong select_opts_for_readinfo;
128   uint no_jbuf_after= UINT_MAX;
129 
130   DBUG_ENTER("JOIN::optimize");
131   DBUG_ASSERT(!tables || thd->lex->is_query_tables_locked());
132 
133   // to prevent double initialization on EXPLAIN
134   if (optimized)
135     DBUG_RETURN(0);
136 
137   // We may do transformations (like semi-join):
138   Prepare_error_tracker tracker(thd);
139 
140   optimized= true;
141   const bool first_optimization= select_lex->first_cond_optimization;
142   select_lex->first_cond_optimization= false;
143 
144   DEBUG_SYNC(thd, "before_join_optimize");
145 
146   THD_STAGE_INFO(thd, stage_optimizing);
147 
148   Opt_trace_context * const trace= &thd->opt_trace;
149   Opt_trace_object trace_wrapper(trace);
150   Opt_trace_object trace_optimize(trace, "join_optimization");
151   trace_optimize.add_select_number(select_lex->select_number);
152   Opt_trace_array trace_steps(trace, "steps");
153 
154   // Needed in case optimizer short-cuts, set properly in make_tmp_tables_info()
155   fields= &select_lex->item_list;
156 
157   /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
158   if (flatten_subqueries())
159     DBUG_RETURN(1); /* purecov: inspected */
160 
161   /*
162     Run optimize phase for all derived tables/views used in this SELECT,
163     including those in semi-joins.
164   */
165   if (select_lex->handle_derived(thd->lex, &mysql_derived_optimize))
166     DBUG_RETURN(1);
167 
168   /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
169 
170   row_limit= ((select_distinct || order || group_list) ? HA_POS_ERROR :
171 	      unit->select_limit_cnt);
172   // m_select_limit is used to decide if we are likely to scan the whole table.
173   m_select_limit= unit->select_limit_cnt;
174   if (having || (select_options & OPTION_FOUND_ROWS))
175     m_select_limit= HA_POS_ERROR;
176   do_send_rows = (unit->select_limit_cnt > 0) ? 1 : 0;
177 
178 #ifdef HAVE_REF_TO_FIELDS			// Not done yet
179   /* Add HAVING to WHERE if possible */
180   if (having && !group_list && !sum_func_count)
181   {
182     if (!conds)
183     {
184       conds= having;
185       having= 0;
186     }
187     else if ((conds=new Item_cond_and(conds,having)))
188     {
189       /*
190         Item_cond_and can't be fixed after creation, so we do not check
191         conds->fixed
192       */
193       conds->fix_fields(thd, &conds);
194       conds->change_ref_to_fields(thd, tables_list);
195       conds->top_level_item();
196       having= 0;
197     }
198   }
199 #endif
200   if (first_optimization)
201   {
202     /*
203       These are permanent transformations, so new items must be
204       allocated in the statement mem root
205     */
206     Prepared_stmt_arena_holder ps_arena_holder(thd);
207 
208     /* Convert all outer joins to inner joins if possible */
209     if (simplify_joins(this, join_list, conds, true, false, &conds))
210     {
211       DBUG_PRINT("error",("Error from simplify_joins"));
212       DBUG_RETURN(1);
213     }
214     if (record_join_nest_info(select_lex, join_list))
215     {
216       DBUG_PRINT("error",("Error from record_join_nest_info"));
217       DBUG_RETURN(1);
218     }
219     build_bitmap_for_nested_joins(join_list, 0);
220 
221     /*
222       After permanent transformations above, prep_where created in
223       st_select_lex::fix_prepare_information() is out-of-date, we need to
224       refresh it.
225       For that We must copy "conds" because it contains AND/OR items in a
226       non-permanent memroot. And this copy must contain real items only,
227       because the new AND/OR items will not have their argument pointers
228       restored by rollback_item_tree_changes().
229       @see st_select_lex::fix_prepare_information() for problems with this.
230       @todo in WL#7082 move transformations above to before
231       st_select_lex::fix_prepare_information(), and remove this second copy
232       below.
233     */
234       select_lex->prep_where=
235         conds ? conds->copy_andor_structure(thd, true): NULL;
236       if (conds)
237         thd->change_item_tree_place(&conds, &select_lex->prep_where);
238   }
239 
240   /*
241     Note: optimize_cond() makes changes to conds. Since
242     select_lex->where and conds points to the same condition, this
243     function call effectively changes select_lex->where as well.
244   */
245   conds= optimize_cond(thd, conds, &cond_equal,
246                        join_list, true, &select_lex->cond_value);
247   if (thd->is_error())
248   {
249     error= 1;
250     DBUG_PRINT("error",("Error from optimize_cond"));
251     DBUG_RETURN(1);
252   }
253 
254   {
255     // Note above about optimize_cond() also applies to selec_lex->having
256     having= optimize_cond(thd, having, &cond_equal, join_list, false,
257                           &select_lex->having_value);
258     if (thd->is_error())
259     {
260       error= 1;
261       DBUG_PRINT("error",("Error from optimize_cond"));
262       DBUG_RETURN(1);
263     }
264     if (select_lex->cond_value == Item::COND_FALSE ||
265         select_lex->having_value == Item::COND_FALSE ||
266         (!unit->select_limit_cnt && !(select_options & OPTION_FOUND_ROWS)))
267     {						/* Impossible cond */
268       zero_result_cause=  select_lex->having_value == Item::COND_FALSE ?
269                            "Impossible HAVING" : "Impossible WHERE";
270       tables= 0;
271       primary_tables= 0;
272       best_rowcount= 0;
273       goto setup_subq_exit;
274     }
275   }
276 
277 #ifdef WITH_PARTITION_STORAGE_ENGINE
278   if (select_lex->partitioned_table_count && prune_table_partitions(thd))
279   {
280     error= 1;
281     DBUG_PRINT("error", ("Error from prune_partitions"));
282     DBUG_RETURN(1);
283   }
284 #endif
285 
286   optimize_fts_limit_query();
287 
288   /*
289      Try to optimize count(*), min() and max() to const fields if
290      there is implicit grouping (aggregate functions but no
291      group_list). In this case, the result set shall only contain one
292      row.
293   */
294   if (tables_list && implicit_grouping)
295   {
296     int res;
297     /*
298       opt_sum_query() returns HA_ERR_KEY_NOT_FOUND if no rows match
299       to the WHERE conditions,
300       or 1 if all items were resolved (optimized away),
301       or 0, or an error number HA_ERR_...
302 
303       If all items were resolved by opt_sum_query, there is no need to
304       open any tables.
305     */
306     if ((res=opt_sum_query(thd, select_lex->leaf_tables, all_fields, conds)))
307     {
308       best_rowcount= 0;
309       if (res == HA_ERR_KEY_NOT_FOUND)
310       {
311         DBUG_PRINT("info",("No matching min/max row"));
312 	zero_result_cause= "No matching min/max row";
313         tables= 0;
314         primary_tables= 0;
315         goto setup_subq_exit;
316       }
317       if (res > 1)
318       {
319         error= res;
320         DBUG_PRINT("error",("Error from opt_sum_query"));
321         DBUG_RETURN(1);
322       }
323       if (res < 0)
324       {
325         DBUG_PRINT("info",("No matching min/max row"));
326         zero_result_cause= "No matching min/max row";
327         tables= 0;
328         primary_tables= 0;
329         goto setup_subq_exit;
330       }
331       DBUG_PRINT("info",("Select tables optimized away"));
332       zero_result_cause= "Select tables optimized away";
333       tables_list= 0;				// All tables resolved
334       best_rowcount= 1;
335       const_tables= primary_tables;
336       /*
337         Extract all table-independent conditions and replace the WHERE
338         clause with them. All other conditions were computed by opt_sum_query
339         and the MIN/MAX/COUNT function(s) have been replaced by constants,
340         so there is no need to compute the whole WHERE clause again.
341         Notice that make_cond_for_table() will always succeed to remove all
342         computed conditions, because opt_sum_query() is applicable only to
343         conjunctions.
344         Preserve conditions for EXPLAIN.
345       */
346       if (conds && !(thd->lex->describe & DESCRIBE_EXTENDED))
347       {
348         Item *table_independent_conds=
349           make_cond_for_table(conds, PSEUDO_TABLE_BITS, 0, 0);
350         DBUG_EXECUTE("where",
351                      print_where(table_independent_conds,
352                                  "where after opt_sum_query()",
353                                  QT_ORDINARY););
354         conds= table_independent_conds;
355       }
356       goto setup_subq_exit;
357     }
358   }
359   if (!tables_list)
360   {
361     DBUG_PRINT("info",("No tables"));
362     best_rowcount= 1;
363     error= 0;
364     if (make_tmp_tables_info())
365       DBUG_RETURN(1);
366     DBUG_RETURN(0);
367   }
368   error= -1;					// Error is sent to client
369   sort_by_table= get_sort_by_table(order, group_list, select_lex->leaf_tables);
370 
371   /* Calculate how to do the join */
372   THD_STAGE_INFO(thd, stage_statistics);
373   if (make_join_statistics(this, select_lex->leaf_tables, conds, &keyuse,
374       first_optimization))
375   {
376     DBUG_PRINT("error",("Error: make_join_statistics() failed"));
377     DBUG_RETURN(1);
378   }
379 
380   if (rollup.state != ROLLUP::STATE_NONE)
381   {
382     if (rollup_process_const_fields())
383     {
384       DBUG_PRINT("error", ("Error: rollup_process_fields() failed"));
385       DBUG_RETURN(1);
386     }
387   }
388   else
389   {
390     /* Remove distinct if only const tables */
391     select_distinct&= !plan_is_const();
392   }
393 
394   if (const_table_map != found_const_table_map &&
395       !(select_options & SELECT_DESCRIBE))
396   {
397     // There is at least one empty const table
398     zero_result_cause= "no matching row in const table";
399     DBUG_PRINT("error",("Error: %s", zero_result_cause));
400     goto setup_subq_exit;
401   }
402   if (!(thd->variables.option_bits & OPTION_BIG_SELECTS) &&
403       best_read > (double) thd->variables.max_join_size &&
404       !(select_options & SELECT_DESCRIBE))
405   {						/* purecov: inspected */
406     my_message(ER_TOO_BIG_SELECT, ER(ER_TOO_BIG_SELECT), MYF(0));
407     error= -1;
408     DBUG_RETURN(1);
409   }
410   if (const_tables && !thd->locked_tables_mode &&
411       !(select_options & SELECT_NO_UNLOCK))
412   {
413     TABLE *ct[MAX_TABLES];
414     for (uint i= 0; i < const_tables; i++)
415       ct[i]= join_tab[i].table;
416     mysql_unlock_some_tables(thd, ct, const_tables);
417   }
418   if (!conds && outer_join)
419   {
420     /* Handle the case where we have an OUTER JOIN without a WHERE */
421     conds=new Item_int((longlong) 1,1);	// Always true
422   }
423 
424   error= 0;
425   if (outer_join)
426   {
427     reset_nj_counters(join_list);
428     make_outerjoin_info(this);
429   }
430   // Assign map of "available" tables to all tables belonging to query block
431   if (!plan_is_const())
432     set_prefix_tables();
433 
434   /*
435     Among the equal fields belonging to the same multiple equality
436     choose the one that is to be retrieved first and substitute
437     all references to these in where condition for a reference for
438     the selected field.
439   */
440   if (conds)
441   {
442     conds= substitute_for_best_equal_field(conds, cond_equal, map2table);
443     if (thd->is_error())
444     {
445       error= 1;
446       DBUG_PRINT("error",("Error from substitute_for_best_equal"));
447       DBUG_RETURN(1);
448     }
449     conds->update_used_tables();
450     DBUG_EXECUTE("where",
451                  print_where(conds,
452                              "after substitute_best_equal",
453                              QT_ORDINARY););
454   }
455 
456   /*
457     Perform the same optimization on field evaluation for all join conditions.
458   */
459   for (JOIN_TAB *tab= join_tab + const_tables; tab < join_tab + tables ; tab++)
460   {
461     if (tab->on_expr_ref && *tab->on_expr_ref)
462     {
463       *tab->on_expr_ref= substitute_for_best_equal_field(*tab->on_expr_ref,
464                                                          tab->cond_equal,
465                                                          map2table);
466       if (thd->is_error())
467       {
468         error= 1;
469         DBUG_PRINT("error",("Error from substitute_for_best_equal"));
470         DBUG_RETURN(1);
471       }
472       (*tab->on_expr_ref)->update_used_tables();
473     }
474   }
475 
476   if (conds && const_table_map != found_const_table_map &&
477       (select_options & SELECT_DESCRIBE))
478   {
479     conds=new Item_int((longlong) 0,1);	// Always false
480   }
481 
482   if (select_lex->materialized_table_count)
483     drop_unused_derived_keys();
484 
485   if (set_access_methods())
486   {
487     error= 1;
488     DBUG_PRINT("error",("Error from set_access_methods"));
489     DBUG_RETURN(1);
490   }
491 
492   // Update table dependencies after assigning ref access fields
493   update_depend_map(this);
494 
495   THD_STAGE_INFO(thd, stage_preparing);
496   if (result->initialize_tables(this))
497   {
498     DBUG_PRINT("error",("Error: initialize_tables() failed"));
499     DBUG_RETURN(1);				// error == -1
500   }
501 
502   if (make_join_select(this, conds))
503   {
504     zero_result_cause=
505       "Impossible WHERE noticed after reading const tables";
506     goto setup_subq_exit;
507   }
508 
509   error= -1;					/* if goto err */
510 
511   /* Optimize distinct away if possible */
512   {
513     ORDER *org_order= order;
514     order= ORDER_with_src(remove_const(this, order, conds, 1, &simple_order, "ORDER BY"), order.src);;
515     if (thd->is_error())
516     {
517       error= 1;
518       DBUG_PRINT("error",("Error from remove_const"));
519       DBUG_RETURN(1);
520     }
521 
522     /*
523       If we are using ORDER BY NULL or ORDER BY const_expression,
524       return result in any order (even if we are using a GROUP BY)
525     */
526     if (!order && org_order)
527       skip_sort_order= 1;
528   }
529   /*
530      Check if we can optimize away GROUP BY/DISTINCT.
531      We can do that if there are no aggregate functions, the
532      fields in DISTINCT clause (if present) and/or columns in GROUP BY
533      (if present) contain direct references to all key parts of
534      an unique index (in whatever order) and if the key parts of the
535      unique index cannot contain NULLs.
536      Note that the unique keys for DISTINCT and GROUP BY should not
537      be the same (as long as they are unique).
538 
539      The FROM clause must contain a single non-constant table.
540   */
541   if (plan_is_single_table() &&
542       (group_list || select_distinct) &&
543       !tmp_table_param.sum_func_count &&
544       (!join_tab[const_tables].select ||
545        !join_tab[const_tables].select->quick ||
546        join_tab[const_tables].select->quick->get_type() !=
547        QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX))
548   {
549     if (group_list && rollup.state == ROLLUP::STATE_NONE &&
550        list_contains_unique_index(&join_tab[const_tables],
551                                  find_field_in_order_list,
552                                  (void *) group_list))
553     {
554       /*
555         We have found that grouping can be removed since groups correspond to
556         only one row anyway, but we still have to guarantee correct result
557         order. The line below effectively rewrites the query from GROUP BY
558         <fields> to ORDER BY <fields>. There are three exceptions:
559         - if skip_sort_order is set (see above), then we can simply skip
560           GROUP BY;
561         - if we are in a subquery, we don't have to maintain order
562         - we can only rewrite ORDER BY if the ORDER BY fields are 'compatible'
563           with the GROUP BY ones, i.e. either one is a prefix of another.
564           We only check if the ORDER BY is a prefix of GROUP BY. In this case
565           test_if_subpart() copies the ASC/DESC attributes from the original
566           ORDER BY fields.
567           If GROUP BY is a prefix of ORDER BY, then it is safe to leave
568           'order' as is.
569        */
570       if (!order || test_if_subpart(group_list, order))
571       {
572         if (skip_sort_order ||
573             select_lex->master_unit()->item) // This is a subquery
574           order= NULL;
575         else
576           order= group_list;
577       }
578       /*
579         If we have an IGNORE INDEX FOR GROUP BY(fields) clause, this must be
580         rewritten to IGNORE INDEX FOR ORDER BY(fields).
581       */
582       join_tab->table->keys_in_use_for_order_by=
583         join_tab->table->keys_in_use_for_group_by;
584       group_list= 0;
585       group= 0;
586     }
587     if (select_distinct &&
588        list_contains_unique_index(&join_tab[const_tables],
589                                  find_field_in_item_list,
590                                  (void *) &fields_list))
591     {
592       select_distinct= 0;
593     }
594   }
595   if (group_list || tmp_table_param.sum_func_count)
596   {
597     if (hidden_group_field_count == 0 && rollup.state == ROLLUP::STATE_NONE)
598     {
599       /*
600         All GROUP expressions are in SELECT list, so resulting rows are
601         distinct. ROLLUP is not specified, so adds no row. So all rows in the
602         result set are distinct, DISTINCT is useless.
603         @todo could remove DISTINCT if ROLLUP were specified and all GROUP
604         expressions were non-nullable, because ROLLUP adds only NULL
605         values. Currently, ROLLUP+DISTINCT is rejected because executor
606         cannot handle it in all cases.
607       */
608       select_distinct= false;
609     }
610   }
611   else if (select_distinct &&
612            plan_is_single_table() &&
613            rollup.state == ROLLUP::STATE_NONE)
614   {
615     /*
616       We are only using one table. In this case we change DISTINCT to a
617       GROUP BY query if:
618       - The GROUP BY can be done through indexes (no sort) and the ORDER
619         BY only uses selected fields.
620 	(In this case we can later optimize away GROUP BY and ORDER BY)
621       - We are scanning the whole table without LIMIT
622         This can happen if:
623         - We are using CALC_FOUND_ROWS
624         - We are using an ORDER BY that can't be optimized away.
625 
626       We don't want to use this optimization when we are using LIMIT
627       because in this case we can just create a temporary table that
628       holds LIMIT rows and stop when this table is full.
629     */
630     JOIN_TAB *tab= &join_tab[const_tables];
631     bool all_order_fields_used;
632     if (order)
633     {
634       skip_sort_order=
635         test_if_skip_sort_order(tab, order, m_select_limit,
636                                 true,           // no_changes
637                                 &tab->table->keys_in_use_for_order_by,
638                                 "ORDER BY");
639     }
640     ORDER *o;
641     if ((o= create_distinct_group(thd, ref_ptrs,
642                                   order, fields_list, all_fields,
643 				  &all_order_fields_used)))
644     {
645       group_list= ORDER_with_src(o, ESC_DISTINCT);
646       const bool skip_group=
647         skip_sort_order &&
648         test_if_skip_sort_order(tab, group_list, m_select_limit,
649                                 true,         // no_changes
650                                 &tab->table->keys_in_use_for_group_by,
651                                 "GROUP BY");
652       count_field_types(select_lex, &tmp_table_param, all_fields, 0);
653       if ((skip_group && all_order_fields_used) ||
654 	  m_select_limit == HA_POS_ERROR ||
655 	  (order && !skip_sort_order))
656       {
657 	/*  Change DISTINCT to GROUP BY */
658 	select_distinct= 0;
659 	no_order= !order;
660 	if (all_order_fields_used)
661 	{
662 	  if (order && skip_sort_order)
663 	  {
664 	    /*
665 	      Force MySQL to read the table in sorted order to get result in
666 	      ORDER BY order.
667 	    */
668 	    tmp_table_param.quick_group=0;
669 	  }
670 	  order=0;
671         }
672 	group=1;				// For end_write_group
673       }
674       else
675 	group_list= 0;
676     }
677     else if (thd->is_fatal_error)			// End of memory
678       DBUG_RETURN(1);
679   }
680   simple_group= 0;
681   {
682     ORDER *old_group_list= group_list;
683     group_list= ORDER_with_src(remove_const(this, group_list, conds,
684                                             rollup.state == ROLLUP::STATE_NONE,
685                                             &simple_group, "GROUP BY"),
686                                group_list.src);
687 
688     if (thd->is_error())
689     {
690       error= 1;
691       DBUG_PRINT("error",("Error from remove_const"));
692       DBUG_RETURN(1);
693     }
694     if (old_group_list && !group_list)
695       select_distinct= 0;
696   }
697   if (!group_list && group)
698   {
699     order=0;					// The output has only one row
700     simple_order=1;
701     select_distinct= 0;                       // No need in distinct for 1 row
702     group_optimized_away= 1;
703   }
704 
705   calc_group_buffer(this, group_list);
706   send_group_parts= tmp_table_param.group_parts; /* Save org parts */
707 
708   if (test_if_subpart(group_list, order) ||
709       (!group_list && tmp_table_param.sum_func_count))
710   {
711     order=0;
712     if (is_indexed_agg_distinct(this, NULL))
713       sort_and_group= 0;
714   }
715 
716   /*
717     If the hint FORCE INDEX FOR ORDER BY/GROUP BY is used for the first
718     table (it does not make sense for other tables) then we cannot do join
719     buffering.
720   */
721   if (!plan_is_const())
722   {
723     const TABLE * const first= join_tab[const_tables].table;
724     if ((first->force_index_order && order) ||
725         (first->force_index_group && group_list))
726       no_jbuf_after= 0;
727   }
728 
729   select_opts_for_readinfo=
730     (select_options & (SELECT_DESCRIBE | SELECT_NO_JOIN_CACHE)) |
731     (select_lex->ftfunc_list->elements ?  SELECT_NO_JOIN_CACHE : 0);
732 
733   if (make_join_readinfo(this, select_opts_for_readinfo, no_jbuf_after))
734     DBUG_RETURN(1);
735 
736   /*
737     Check if we need to create a temporary table.
738     This has to be done if all tables are not already read (const tables)
739     and one of the following conditions holds:
740     - We are using DISTINCT (simple distinct's are already optimized away)
741     - We are using an ORDER BY or GROUP BY on fields not in the first table
742     - We are using different ORDER BY and GROUP BY orders
743     - The user wants us to buffer the result.
744     When the WITH ROLLUP modifier is present, we cannot skip temporary table
745     creation for the DISTINCT clause just because there are only const tables.
746   */
747   need_tmp= ((!plan_is_const() &&
748 	     ((select_distinct || !simple_order || !simple_group) ||
749 	      (group_list && order) ||
750 	      MY_TEST(select_options & OPTION_BUFFER_RESULT))) ||
751              (rollup.state != ROLLUP::STATE_NONE && select_distinct));
752 
753   /* Perform FULLTEXT search before all regular searches */
754   if (!(select_options & SELECT_DESCRIBE) &&
755       !select_lex->materialized_table_count &&
756       select_lex->has_ft_funcs())
757   {
758     if (init_ftfuncs(thd, select_lex, order))
759       DBUG_RETURN(1);
760     optimize_fts_query();
761   }
762 
763   /*
764     By setting child_subquery_can_materialize so late we gain the following:
765     JOIN::compare_costs_of_subquery_strategies() can test this variable to
766     know if we are have finished evaluating constant conditions, which itself
767     helps determining fanouts.
768   */
769   child_subquery_can_materialize= true;
770 
771   /*
772     It's necessary to check const part of HAVING cond as
773     there is a chance that some cond parts may become
774     const items after make_join_statisctics(for example
775     when Item is a reference to cost table field from
776     outer join).
777     This check is performed only for those conditions
778     which do not use aggregate functions. In such case
779     temporary table may not be used and const condition
780     elements may be lost during further having
781     condition transformation in JOIN::exec.
782   */
783   if (having && const_table_map && !having->with_sum_func)
784   {
785     having->update_used_tables();
786     having= remove_eq_conds(thd, having, &select_lex->having_value);
787     if (select_lex->having_value == Item::COND_FALSE)
788     {
789       having= having_for_explain= new Item_int((longlong) 0,1);
790       zero_result_cause= "Impossible HAVING noticed after reading const tables";
791       error= 0;
792       DBUG_RETURN(0);
793     }
794   }
795 
796   /* Cache constant expressions in WHERE, HAVING, ON clauses. */
797   if (!plan_is_const() && cache_const_exprs())
798     DBUG_RETURN(1);
799 
800   // See if this subquery can be evaluated with subselect_indexsubquery_engine
801   if (!group_list && !order &&
802       unit->item && unit->item->substype() == Item_subselect::IN_SUBS &&
803       primary_tables == 1 && conds &&
804       !unit->is_union())
805   {
806     bool changed= FALSE;
807     subselect_engine *engine= 0;
808     Item_in_subselect * const in_subs=
809       static_cast<Item_in_subselect *>(unit->item);
810     if (in_subs->exec_method == Item_exists_subselect::EXEC_MATERIALIZATION)
811     {
812       // We cannot have two engines at the same time
813     }
814     else if (!having)
815     {
816       Item *where= conds;
817       if (join_tab[0].type == JT_EQ_REF &&
818 	  join_tab[0].ref.items[0]->item_name.ptr() == in_left_expr_name)
819       {
820         remove_subq_pushed_predicates(&where);
821         save_index_subquery_explain_info(join_tab, where);
822         join_tab[0].type= JT_UNIQUE_SUBQUERY;
823         error= 0;
824         changed= TRUE;
825         engine= new subselect_indexsubquery_engine(thd, join_tab, unit->item,
826                                                    where, NULL /* having */,
827                                                    false /* check_null */,
828                                                    true /* unique */);
829       }
830       else if (join_tab[0].type == JT_REF &&
831 	       join_tab[0].ref.items[0]->item_name.ptr() == in_left_expr_name)
832       {
833 	remove_subq_pushed_predicates(&where);
834         save_index_subquery_explain_info(join_tab, where);
835         join_tab[0].type= JT_INDEX_SUBQUERY;
836         error= 0;
837         changed= TRUE;
838         engine= new subselect_indexsubquery_engine(thd, join_tab, unit->item,
839                                                    where, NULL, false, false);
840       }
841     } else if (join_tab[0].type == JT_REF_OR_NULL &&
842 	       join_tab[0].ref.items[0]->item_name.ptr() == in_left_expr_name &&
843                having->item_name.ptr() == in_having_cond)
844     {
845       join_tab[0].type= JT_INDEX_SUBQUERY;
846       error= 0;
847       changed= TRUE;
848       conds= remove_additional_cond(conds);
849       save_index_subquery_explain_info(join_tab, conds);
850       engine= new subselect_indexsubquery_engine(thd, join_tab, unit->item,
851                                                  conds, having, true, false);
852       /**
853          @todo Above we passed unique=false. But for this query:
854           (oe1, oe2) IN (SELECT primary_key, non_key_maybe_null_field FROM tbl)
855          we could use "unique=true" for the first index component and let
856          Item_is_not_null_test(non_key_maybe_null_field) handle the second.
857       */
858     }
859     if (changed)
860     {
861       /*
862         We leave optimize() because the rest of it is only about order/group
863         which those subqueries don't have.
864         @todo: let execution flow down instead, to be future-proof.
865       */
866       DBUG_RETURN(unit->item->change_engine(engine));
867     }
868   }
869   /*
870     Need to tell handlers that to play it safe, it should fetch all
871     columns of the primary key of the tables: this is because MySQL may
872     build row pointers for the rows, and for all columns of the primary key
873     the read set has not necessarily been set by the server code.
874   */
875   if (need_tmp || select_distinct || group_list || order)
876   {
877     for (uint i = const_tables; i < primary_tables; i++)
878       join_tab[i].table->prepare_for_position();
879   }
880   DBUG_EXECUTE("info", TEST_join(this););
881 
882   if (!plan_is_const())
883   {
884     JOIN_TAB *tab= &join_tab[const_tables];
885 
886     if (order)
887     {
888       /*
889         Force using of tmp table if sorting by a SP or UDF function due to
890         their expensive and probably non-deterministic nature.
891       */
892       for (ORDER *tmp_order= order; tmp_order ; tmp_order=tmp_order->next)
893       {
894         Item *item= *tmp_order->item;
895         if (item->is_expensive())
896         {
897           /* Force tmp table without sort */
898           need_tmp=1; simple_order=simple_group=0;
899           break;
900         }
901       }
902     }
903 
904     /*
905       Because filesort always does a full table scan or a quick range scan
906       we must add the removed reference to the select for the table.
907       We only need to do this when we have a simple_order or simple_group
908       as in other cases the join is done before the sort.
909     */
910     if ((order || group_list) &&
911         tab->type != JT_ALL &&
912         tab->type != JT_FT &&
913         tab->type != JT_REF_OR_NULL &&
914         ((order && simple_order) || (group_list && simple_group)))
915     {
916       if (add_ref_to_table_cond(thd,tab)) {
917         DBUG_RETURN(1);
918       }
919     }
920 
921     /*
922       Investigate whether we may use an ordered index as part of either
923       DISTINCT, GROUP BY or ORDER BY execution. An ordered index may be
924       used for only the first of any of these terms to be executed. This
925       is reflected in the order which we check for test_if_skip_sort_order()
926       below. However we do not check for DISTINCT here, as it would have
927       been transformed to a GROUP BY at this stage if it is a candidate for
928       ordered index optimization.
929       If a decision was made to use an ordered index, the availability
930       if such an access path is stored in 'ordered_index_usage' for later
931       use by 'execute' or 'explain'
932     */
933     DBUG_ASSERT(ordered_index_usage == ordered_index_void);
934 
935     if (group_list)   // GROUP BY honoured first
936                       // (DISTINCT was rewritten to GROUP BY if skippable)
937     {
938       /*
939         When there is SQL_BIG_RESULT do not sort using index for GROUP BY,
940         and thus force sorting on disk unless a group min-max optimization
941         is going to be used as it is applied now only for one table queries
942         with covering indexes.
943       */
944       if (!(select_options & SELECT_BIG_RESULT) ||
945             (tab->select &&
946              tab->select->quick &&
947              tab->select->quick->get_type() ==
948              QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX))
949       {
950         if (simple_group &&              // GROUP BY is possibly skippable
951             !select_distinct)            // .. if not preceded by a DISTINCT
952         {
953           /*
954             Calculate a possible 'limit' of table rows for 'GROUP BY':
955             A specified 'LIMIT' is relative to the final resultset.
956             'need_tmp' implies that there will be more postprocessing
957             so the specified 'limit' should not be enforced yet.
958            */
959           const ha_rows limit = need_tmp ? HA_POS_ERROR : m_select_limit;
960 
961           if (test_if_skip_sort_order(tab, group_list, limit, false,
962                                       &tab->table->keys_in_use_for_group_by,
963                                       "GROUP BY"))
964           {
965             ordered_index_usage= ordered_index_group_by;
966           }
967         }
968 
969 	/*
970 	  If we are going to use semi-join LooseScan, it will depend
971 	  on the selected index scan to be used.  If index is not used
972 	  for the GROUP BY, we risk that sorting is put on the LooseScan
973 	  table.  In order to avoid this, force use of temporary table.
974 	  TODO: Explain the quick_group part of the test below.
975 	 */
976         if ((ordered_index_usage != ordered_index_group_by) &&
977             (tmp_table_param.quick_group ||
978 	     (tab->emb_sj_nest &&
979 	      tab->position->sj_strategy == SJ_OPT_LOOSE_SCAN)))
980         {
981           need_tmp=1;
982           simple_order= simple_group= false; // Force tmp table without sort
983         }
984       }
985     }
986     else if (order &&                      // ORDER BY wo/ preceeding GROUP BY
987              (simple_order || skip_sort_order)) // which is possibly skippable
988     {
989       if (test_if_skip_sort_order(tab, order, m_select_limit, false,
990                                   &tab->table->keys_in_use_for_order_by,
991                                   "ORDER BY"))
992       {
993         ordered_index_usage= ordered_index_order_by;
994       }
995     }
996   }
997 
998   /**
999    * Push joins to handler(s) whenever possible.
1000    * The handlers will inspect the QEP through the
1001    * AQP (Abstract Query Plan), and extract from it
1002    * whatewer it might implement of pushed execution.
1003    * It is the responsibility if the handler to store any
1004    * information it need for later execution of pushed queries.
1005    *
1006    * Currently pushed joins are only implemented by NDB.
1007    * It only make sense to try pushing if > 1 non-const tables.
1008    */
1009   if (!plan_is_const() && !plan_is_single_table())
1010   {
1011     const AQP::Join_plan plan(this);
1012     if (ha_make_pushed_joins(thd, &plan))
1013       DBUG_RETURN(1);
1014   }
1015 
1016   /**
1017    * Set up access functions for the tables as
1018    * required by the selected access type.
1019    */
1020   for (uint i= const_tables; i < tables; i++)
1021   {
1022     pick_table_access_method (&join_tab[i]);
1023   }
1024 
1025   if (make_tmp_tables_info())
1026     DBUG_RETURN(1);
1027 
1028   error= 0;
1029   DBUG_RETURN(0);
1030 
1031 setup_subq_exit:
1032 
1033   DBUG_ASSERT(zero_result_cause != NULL);
1034   /*
1035     Even with zero matching rows, subqueries in the HAVING clause may
1036     need to be evaluated if there are aggregate functions in the
1037     query. If this JOIN is part of an outer query, subqueries in HAVING may
1038     be evaluated several times in total; so subquery materialization makes
1039     sense.
1040   */
1041   child_subquery_can_materialize= true;
1042   trace_steps.end();   // because all steps are done
1043   Opt_trace_object(trace, "empty_result")
1044     .add_alnum("cause", zero_result_cause);
1045 
1046   having_for_explain= having;
1047   error= 0;
1048   DBUG_RETURN(0);
1049 }
1050 
1051 
1052 #ifdef WITH_PARTITION_STORAGE_ENGINE
1053 
1054 /**
1055   Prune partitions for all tables of a join (query block).
1056 
1057   Requires that tables have been locked.
1058 
1059   @param thd Thread pointer
1060 
1061   @returns false if success, true if error
1062 */
prune_table_partitions(THD * thd)1063 bool JOIN::prune_table_partitions(THD *thd)
1064 {
1065   DBUG_ASSERT(select_lex->partitioned_table_count);
1066 
1067   for (TABLE_LIST *tbl= select_lex->leaf_tables; tbl; tbl= tbl->next_leaf)
1068   {
1069     /*
1070       If tbl->embedding!=NULL that means that this table is in the inner
1071       part of the nested outer join, and we can't do partition pruning
1072       (TODO: check if this limitation can be lifted.
1073              This also excludes semi-joins.  Is that intentional?)
1074       This will try to prune non-static conditions, which can
1075       be used after the tables are locked.
1076     */
1077     if (!tbl->embedding)
1078     {
1079       if (prune_partitions(thd, tbl->table,
1080                            tbl->join_cond() ? tbl->join_cond() : conds))
1081         return true;
1082     }
1083   }
1084 
1085   return false;
1086 }
1087 
1088 #endif
1089 
1090 
1091 /**
1092   Set NESTED_JOIN::counter=0 in all nested joins in passed list.
1093 
1094     Recursively set NESTED_JOIN::counter=0 for all nested joins contained in
1095     the passed join_list.
1096 
1097   @param join_list  List of nested joins to process. It may also contain base
1098                     tables which will be ignored.
1099 */
1100 
reset_nj_counters(List<TABLE_LIST> * join_list)1101 void reset_nj_counters(List<TABLE_LIST> *join_list)
1102 {
1103   List_iterator<TABLE_LIST> li(*join_list);
1104   TABLE_LIST *table;
1105   DBUG_ENTER("reset_nj_counters");
1106   while ((table= li++))
1107   {
1108     NESTED_JOIN *nested_join;
1109     if ((nested_join= table->nested_join))
1110     {
1111       nested_join->nj_counter= 0;
1112       reset_nj_counters(&nested_join->join_list);
1113     }
1114   }
1115   DBUG_VOID_RETURN;
1116 }
1117 
1118 
1119 /*****************************************************************************
1120   Make some simple condition optimization:
1121   If there is a test 'field = const' change all refs to 'field' to 'const'
1122   Remove all dummy tests 'item = item', 'const op const'.
1123   Remove all 'item is NULL', when item can never be null!
1124   item->marker should be 0 for all items on entry
1125   Return in cond_value FALSE if condition is impossible (1 = 2)
1126 *****************************************************************************/
1127 
1128 class COND_CMP :public ilink<COND_CMP> {
1129 public:
operator new(size_t size)1130   static void *operator new(size_t size)
1131   {
1132     return (void*) sql_alloc((uint) size);
1133   }
operator delete(void * ptr MY_ATTRIBUTE ((unused)),size_t size MY_ATTRIBUTE ((unused)))1134   static void operator delete(void *ptr MY_ATTRIBUTE((unused)),
1135                               size_t size MY_ATTRIBUTE((unused)))
1136   { TRASH(ptr, size); }
1137 
1138   Item *and_level;
1139   Item_func *cmp_func;
COND_CMP(Item * a,Item_func * b)1140   COND_CMP(Item *a,Item_func *b) :and_level(a),cmp_func(b) {}
1141 };
1142 
1143 
1144 /**
1145   Find the multiple equality predicate containing a field.
1146 
1147   The function retrieves the multiple equalities accessed through
1148   the con_equal structure from current level and up looking for
1149   an equality containing field. It stops retrieval as soon as the equality
1150   is found and set up inherited_fl to TRUE if it's found on upper levels.
1151 
1152   @param cond_equal          multiple equalities to search in
1153   @param field               field to look for
1154   @param[out] inherited_fl   set up to TRUE if multiple equality is found
1155                              on upper levels (not on current level of
1156                              cond_equal)
1157 
1158   @return
1159     - Item_equal for the found multiple equality predicate if a success;
1160     - NULL otherwise.
1161 */
1162 
find_item_equal(COND_EQUAL * cond_equal,Field * field,bool * inherited_fl)1163 Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field,
1164                             bool *inherited_fl)
1165 {
1166   Item_equal *item= 0;
1167   bool in_upper_level= FALSE;
1168   while (cond_equal)
1169   {
1170     List_iterator_fast<Item_equal> li(cond_equal->current_level);
1171     while ((item= li++))
1172     {
1173       if (item->contains(field))
1174         goto finish;
1175     }
1176     in_upper_level= TRUE;
1177     cond_equal= cond_equal->upper_levels;
1178   }
1179   in_upper_level= FALSE;
1180 finish:
1181   *inherited_fl= in_upper_level;
1182   return item;
1183 }
1184 
1185 
1186 /**
1187   Get the best field substitution for a given field.
1188 
1189   If the field is member of a multiple equality, look up that equality
1190   and return the most appropriate field. Usually this is the equivalenced
1191   field belonging to the outer-most table in the join order, but
1192   @see Item_field::get_subst_item() for details.
1193   Otherwise, return the same field.
1194 
1195   @param item_field The field that we are seeking a substitution for.
1196   @param cond_equal multiple equalities to search in
1197 
1198   @return The substituted field.
1199 */
1200 
get_best_field(Item_field * item_field,COND_EQUAL * cond_equal)1201 Item_field *get_best_field(Item_field *item_field, COND_EQUAL *cond_equal)
1202 {
1203   bool dummy;
1204   Item_equal *item_eq= find_item_equal(cond_equal, item_field->field, &dummy);
1205   if (!item_eq)
1206     return item_field;
1207 
1208   return item_eq->get_subst_item(item_field);
1209 }
1210 
1211 
1212 /**
1213   Check whether an equality can be used to build multiple equalities.
1214 
1215     This function first checks whether the equality (left_item=right_item)
1216     is a simple equality i.e. the one that equates a field with another field
1217     or a constant (field=field_item or field=const_item).
1218     If this is the case the function looks for a multiple equality
1219     in the lists referenced directly or indirectly by cond_equal inferring
1220     the given simple equality. If it doesn't find any, it builds a multiple
1221     equality that covers the predicate, i.e. the predicate can be inferred
1222     from this multiple equality.
1223     The built multiple equality could be obtained in such a way:
1224     create a binary  multiple equality equivalent to the predicate, then
1225     merge it, if possible, with one of old multiple equalities.
1226     This guarantees that the set of multiple equalities covering equality
1227     predicates will be minimal.
1228 
1229   EXAMPLE:
1230     For the where condition
1231     @code
1232       WHERE a=b AND b=c AND
1233             (b=2 OR f=e)
1234     @endcode
1235     the check_equality will be called for the following equality
1236     predicates a=b, b=c, b=2 and f=e.
1237     - For a=b it will be called with *cond_equal=(0,[]) and will transform
1238       *cond_equal into (0,[Item_equal(a,b)]).
1239     - For b=c it will be called with *cond_equal=(0,[Item_equal(a,b)])
1240       and will transform *cond_equal into CE=(0,[Item_equal(a,b,c)]).
1241     - For b=2 it will be called with *cond_equal=(ptr(CE),[])
1242       and will transform *cond_equal into (ptr(CE),[Item_equal(2,a,b,c)]).
1243     - For f=e it will be called with *cond_equal=(ptr(CE), [])
1244       and will transform *cond_equal into (ptr(CE),[Item_equal(f,e)]).
1245 
1246   @note
1247     Now only fields that have the same type definitions (verified by
1248     the Field::eq_def method) are placed to the same multiple equalities.
1249     Because of this some equality predicates are not eliminated and
1250     can be used in the constant propagation procedure.
1251     We could weeken the equlity test as soon as at least one of the
1252     equal fields is to be equal to a constant. It would require a
1253     more complicated implementation: we would have to store, in
1254     general case, its own constant for each fields from the multiple
1255     equality. But at the same time it would allow us to get rid
1256     of constant propagation completely: it would be done by the call
1257     to build_equal_items_for_cond.
1258 
1259 
1260     The implementation does not follow exactly the above rules to
1261     build a new multiple equality for the equality predicate.
1262     If it processes the equality of the form field1=field2, it
1263     looks for multiple equalities me1 containig field1 and me2 containing
1264     field2. If only one of them is found the fuction expands it with
1265     the lacking field. If multiple equalities for both fields are
1266     found they are merged. If both searches fail a new multiple equality
1267     containing just field1 and field2 is added to the existing
1268     multiple equalities.
1269     If the function processes the predicate of the form field1=const,
1270     it looks for a multiple equality containing field1. If found, the
1271     function checks the constant of the multiple equality. If the value
1272     is unknown, it is setup to const. Otherwise the value is compared with
1273     const and the evaluation of the equality predicate is performed.
1274     When expanding/merging equality predicates from the upper levels
1275     the function first copies them for the current level. It looks
1276     acceptable, as this happens rarely. The implementation without
1277     copying would be much more complicated.
1278 
1279   @param left_item   left term of the quality to be checked
1280   @param right_item  right term of the equality to be checked
1281   @param item        equality item if the equality originates from a condition
1282                      predicate, 0 if the equality is the result of row
1283                      elimination
1284   @param cond_equal  multiple equalities that must hold together with the
1285                      equality
1286 
1287   @retval
1288     TRUE    if the predicate is a simple equality predicate to be used
1289     for building multiple equalities
1290   @retval
1291     FALSE   otherwise
1292 */
1293 
check_simple_equality(Item * left_item,Item * right_item,Item * item,COND_EQUAL * cond_equal)1294 static bool check_simple_equality(Item *left_item, Item *right_item,
1295                                   Item *item, COND_EQUAL *cond_equal)
1296 {
1297   if (left_item->type() == Item::REF_ITEM &&
1298       ((Item_ref*)left_item)->ref_type() == Item_ref::VIEW_REF)
1299   {
1300     if (((Item_ref*)left_item)->depended_from)
1301       return FALSE;
1302     left_item= left_item->real_item();
1303   }
1304   if (right_item->type() == Item::REF_ITEM &&
1305       ((Item_ref*)right_item)->ref_type() == Item_ref::VIEW_REF)
1306   {
1307     if (((Item_ref*)right_item)->depended_from)
1308       return FALSE;
1309     right_item= right_item->real_item();
1310   }
1311   if (left_item->type() == Item::FIELD_ITEM &&
1312       right_item->type() == Item::FIELD_ITEM &&
1313       !((Item_field*)left_item)->depended_from &&
1314       !((Item_field*)right_item)->depended_from)
1315   {
1316     /* The predicate the form field1=field2 is processed */
1317 
1318     Field *left_field= ((Item_field*) left_item)->field;
1319     Field *right_field= ((Item_field*) right_item)->field;
1320 
1321     if (!left_field->eq_def(right_field))
1322       return FALSE;
1323 
1324     /* Search for multiple equalities containing field1 and/or field2 */
1325     bool left_copyfl, right_copyfl;
1326     Item_equal *left_item_equal=
1327                find_item_equal(cond_equal, left_field, &left_copyfl);
1328     Item_equal *right_item_equal=
1329                find_item_equal(cond_equal, right_field, &right_copyfl);
1330 
1331     /* As (NULL=NULL) != TRUE we can't just remove the predicate f=f */
1332     if (left_field->eq(right_field)) /* f = f */
1333       return (!(left_field->maybe_null() && !left_item_equal));
1334 
1335     if (left_item_equal && left_item_equal == right_item_equal)
1336     {
1337       /*
1338         The equality predicate is inference of one of the existing
1339         multiple equalities, i.e the condition is already covered
1340         by upper level equalities
1341       */
1342        return TRUE;
1343     }
1344 
1345     /* Copy the found multiple equalities at the current level if needed */
1346     if (left_copyfl)
1347     {
1348       /* left_item_equal of an upper level contains left_item */
1349       left_item_equal= new Item_equal(left_item_equal);
1350       cond_equal->current_level.push_back(left_item_equal);
1351     }
1352     if (right_copyfl)
1353     {
1354       /* right_item_equal of an upper level contains right_item */
1355       right_item_equal= new Item_equal(right_item_equal);
1356       cond_equal->current_level.push_back(right_item_equal);
1357     }
1358 
1359     if (left_item_equal)
1360     {
1361       /* left item was found in the current or one of the upper levels */
1362       if (! right_item_equal)
1363         left_item_equal->add((Item_field *) right_item);
1364       else
1365       {
1366         /* Merge two multiple equalities forming a new one */
1367         left_item_equal->merge(right_item_equal);
1368         /* Remove the merged multiple equality from the list */
1369         List_iterator<Item_equal> li(cond_equal->current_level);
1370         while ((li++) != right_item_equal) ;
1371         li.remove();
1372       }
1373     }
1374     else
1375     {
1376       /* left item was not found neither the current nor in upper levels  */
1377       if (right_item_equal)
1378       {
1379         right_item_equal->add((Item_field *) left_item);
1380       }
1381       else
1382       {
1383         /* None of the fields was found in multiple equalities */
1384         Item_equal *item_equal= new Item_equal((Item_field *) left_item,
1385                                                (Item_field *) right_item);
1386         cond_equal->current_level.push_back(item_equal);
1387       }
1388     }
1389     return TRUE;
1390   }
1391 
1392   {
1393     /* The predicate of the form field=const/const=field is processed */
1394     Item *const_item= 0;
1395     Item_field *field_item= 0;
1396     if (left_item->type() == Item::FIELD_ITEM &&
1397         !((Item_field*)left_item)->depended_from &&
1398         right_item->const_item())
1399     {
1400       field_item= (Item_field*) left_item;
1401       const_item= right_item;
1402     }
1403     else if (right_item->type() == Item::FIELD_ITEM &&
1404              !((Item_field*)right_item)->depended_from &&
1405              left_item->const_item())
1406     {
1407       field_item= (Item_field*) right_item;
1408       const_item= left_item;
1409     }
1410 
1411     if (const_item &&
1412         field_item->result_type() == const_item->result_type())
1413     {
1414       bool copyfl;
1415 
1416       if (field_item->result_type() == STRING_RESULT)
1417       {
1418         const CHARSET_INFO *cs= field_item->field->charset();
1419         if (!item)
1420         {
1421           Item_func_eq *eq_item;
1422           if (!(eq_item= new Item_func_eq(left_item, right_item)) ||
1423               eq_item->set_cmp_func())
1424             return FALSE;
1425           eq_item->quick_fix_field();
1426           item= eq_item;
1427         }
1428         if ((cs != ((Item_func *) item)->compare_collation()) ||
1429             !cs->coll->propagate(cs, 0, 0))
1430           return FALSE;
1431       }
1432 
1433       Item_equal *item_equal = find_item_equal(cond_equal,
1434                                                field_item->field, &copyfl);
1435       if (copyfl)
1436       {
1437         item_equal= new Item_equal(item_equal);
1438         cond_equal->current_level.push_back(item_equal);
1439       }
1440       if (item_equal)
1441       {
1442         /*
1443           The flag cond_false will be set to 1 after this, if item_equal
1444           already contains a constant and its value is  not equal to
1445           the value of const_item.
1446         */
1447         item_equal->add(const_item, field_item);
1448       }
1449       else
1450       {
1451         item_equal= new Item_equal(const_item, field_item);
1452         cond_equal->current_level.push_back(item_equal);
1453       }
1454       return TRUE;
1455     }
1456   }
1457   return FALSE;
1458 }
1459 
1460 
1461 /**
1462   Convert row equalities into a conjunction of regular equalities.
1463 
1464     The function converts a row equality of the form (E1,...,En)=(E'1,...,E'n)
1465     into a list of equalities E1=E'1,...,En=E'n. For each of these equalities
1466     Ei=E'i the function checks whether it is a simple equality or a row
1467     equality. If it is a simple equality it is used to expand multiple
1468     equalities of cond_equal. If it is a row equality it converted to a
1469     sequence of equalities between row elements. If Ei=E'i is neither a
1470     simple equality nor a row equality the item for this predicate is added
1471     to eq_list.
1472 
1473   @param thd        thread handle
1474   @param left_row   left term of the row equality to be processed
1475   @param right_row  right term of the row equality to be processed
1476   @param cond_equal multiple equalities that must hold together with the
1477                     predicate
1478   @param eq_list    results of conversions of row equalities that are not
1479                     simple enough to form multiple equalities
1480 
1481   @retval
1482     TRUE    if conversion has succeeded (no fatal error)
1483   @retval
1484     FALSE   otherwise
1485 */
1486 
check_row_equality(THD * thd,Item * left_row,Item_row * right_row,COND_EQUAL * cond_equal,List<Item> * eq_list)1487 static bool check_row_equality(THD *thd, Item *left_row, Item_row *right_row,
1488                                COND_EQUAL *cond_equal, List<Item>* eq_list)
1489 {
1490   uint n= left_row->cols();
1491   for (uint i= 0 ; i < n; i++)
1492   {
1493     bool is_converted;
1494     Item *left_item= left_row->element_index(i);
1495     Item *right_item= right_row->element_index(i);
1496     if (left_item->type() == Item::ROW_ITEM &&
1497         right_item->type() == Item::ROW_ITEM)
1498     {
1499       is_converted= check_row_equality(thd,
1500                                        (Item_row *) left_item,
1501                                        (Item_row *) right_item,
1502 			               cond_equal, eq_list);
1503       if (!is_converted)
1504         thd->lex->current_select->cond_count++;
1505     }
1506     else
1507     {
1508       is_converted= check_simple_equality(left_item, right_item, 0, cond_equal);
1509       thd->lex->current_select->cond_count++;
1510     }
1511 
1512     if (!is_converted)
1513     {
1514       Item_func_eq *eq_item;
1515       if (!(eq_item= new Item_func_eq(left_item, right_item)) ||
1516           eq_item->set_cmp_func())
1517         return FALSE;
1518       eq_item->quick_fix_field();
1519       eq_list->push_back(eq_item);
1520     }
1521   }
1522   return TRUE;
1523 }
1524 
1525 
1526 /**
1527   Eliminate row equalities and form multiple equalities predicates.
1528 
1529     This function checks whether the item is a simple equality
1530     i.e. the one that equates a field with another field or a constant
1531     (field=field_item or field=constant_item), or, a row equality.
1532     For a simple equality the function looks for a multiple equality
1533     in the lists referenced directly or indirectly by cond_equal inferring
1534     the given simple equality. If it doesn't find any, it builds/expands
1535     multiple equality that covers the predicate.
1536     Row equalities are eliminated substituted for conjunctive regular
1537     equalities which are treated in the same way as original equality
1538     predicates.
1539 
1540   @param thd        thread handle
1541   @param item       predicate to process
1542   @param cond_equal multiple equalities that must hold together with the
1543                     predicate
1544   @param eq_list    results of conversions of row equalities that are not
1545                     simple enough to form multiple equalities
1546 
1547   @retval
1548     TRUE   if re-writing rules have been applied
1549   @retval
1550     FALSE  otherwise, i.e.
1551            if the predicate is not an equality,
1552            or, if the equality is neither a simple one nor a row equality,
1553            or, if the procedure fails by a fatal error.
1554 
1555   @note If the equality was created by IN->EXISTS, it may be removed later by
1556   subquery materialization. So we don't mix this possibly temporary equality
1557   with others; if we let it go into a multiple-equality (Item_equal), then we
1558   could not remove it later. There is however an exception: if the outer
1559   expression is a constant, it is safe to leave the equality even in
1560   materialization; all it can do is preventing NULL/FALSE distinction but if
1561   such distinction mattered the equality would be in a triggered condition so
1562   we would not come to this function. And injecting constants is good because
1563   it makes the materialized table smaller.
1564 */
1565 
check_equality(THD * thd,Item * item,COND_EQUAL * cond_equal,List<Item> * eq_list)1566 static bool check_equality(THD *thd, Item *item, COND_EQUAL *cond_equal,
1567                            List<Item> *eq_list)
1568 {
1569   if (item->type() == Item::FUNC_ITEM &&
1570          ((Item_func*) item)->functype() == Item_func::EQ_FUNC)
1571   {
1572     Item *left_item= ((Item_func*) item)->arguments()[0];
1573     Item *right_item= ((Item_func*) item)->arguments()[1];
1574 
1575     if (item->created_by_in2exists() && !left_item->const_item())
1576       return false;                             // See note above
1577 
1578     if (left_item->type() == Item::ROW_ITEM &&
1579         right_item->type() == Item::ROW_ITEM)
1580     {
1581       thd->lex->current_select->cond_count--;
1582       return check_row_equality(thd,
1583                                 (Item_row *) left_item,
1584                                 (Item_row *) right_item,
1585                                 cond_equal, eq_list);
1586     }
1587     else
1588       return check_simple_equality(left_item, right_item, item, cond_equal);
1589   }
1590 
1591   return FALSE;
1592 }
1593 
1594 
1595 /**
1596   Replace all equality predicates in a condition by multiple equality items.
1597 
1598     At each 'and' level the function detects items for equality predicates
1599     and replaced them by a set of multiple equality items of class Item_equal,
1600     taking into account inherited equalities from upper levels.
1601     If an equality predicate is used not in a conjunction it's just
1602     replaced by a multiple equality predicate.
1603     For each 'and' level the function set a pointer to the inherited
1604     multiple equalities in the cond_equal field of the associated
1605     object of the type Item_cond_and.
1606     The function also traverses the cond tree and and for each field reference
1607     sets a pointer to the multiple equality item containing the field, if there
1608     is any. If this multiple equality equates fields to a constant the
1609     function replaces the field reference by the constant in the cases
1610     when the field is not of a string type or when the field reference is
1611     just an argument of a comparison predicate.
1612     The function also determines the maximum number of members in
1613     equality lists of each Item_cond_and object assigning it to
1614     thd->lex->current_select->max_equal_elems.
1615 
1616   @note
1617     Multiple equality predicate =(f1,..fn) is equivalent to the conjuction of
1618     f1=f2, .., fn-1=fn. It substitutes any inference from these
1619     equality predicates that is equivalent to the conjunction.
1620     Thus, =(a1,a2,a3) can substitute for ((a1=a3) AND (a2=a3) AND (a2=a1)) as
1621     it is equivalent to ((a1=a2) AND (a2=a3)).
1622     The function always makes a substitution of all equality predicates occured
1623     in a conjuction for a minimal set of multiple equality predicates.
1624     This set can be considered as a canonical representation of the
1625     sub-conjunction of the equality predicates.
1626     E.g. (t1.a=t2.b AND t2.b>5 AND t1.a=t3.c) is replaced by
1627     (=(t1.a,t2.b,t3.c) AND t2.b>5), not by
1628     (=(t1.a,t2.b) AND =(t1.a,t3.c) AND t2.b>5);
1629     while (t1.a=t2.b AND t2.b>5 AND t3.c=t4.d) is replaced by
1630     (=(t1.a,t2.b) AND =(t3.c=t4.d) AND t2.b>5),
1631     but if additionally =(t4.d,t2.b) is inherited, it
1632     will be replaced by (=(t1.a,t2.b,t3.c,t4.d) AND t2.b>5)
1633 
1634     The function performs the substitution in a recursive descent by
1635     the condtion tree, passing to the next AND level a chain of multiple
1636     equality predicates which have been built at the upper levels.
1637     The Item_equal items built at the level are attached to other
1638     non-equality conjucts as a sublist. The pointer to the inherited
1639     multiple equalities is saved in the and condition object (Item_cond_and).
1640     This chain allows us for any field reference occurence easyly to find a
1641     multiple equality that must be held for this occurence.
1642     For each AND level we do the following:
1643     - scan it for all equality predicate (=) items
1644     - join them into disjoint Item_equal() groups
1645     - process the included OR conditions recursively to do the same for
1646       lower AND levels.
1647 
1648     We need to do things in this order as lower AND levels need to know about
1649     all possible Item_equal objects in upper levels.
1650 
1651   @param thd        thread handle
1652   @param cond       condition(expression) where to make replacement
1653   @param inherited  path to all inherited multiple equality items
1654   @param do_inherit whether or not to inherit equalities from other
1655                     parts of the condition
1656 
1657   @return
1658     pointer to the transformed condition
1659 */
1660 
build_equal_items_for_cond(THD * thd,Item * cond,COND_EQUAL * inherited,bool do_inherit)1661 static Item *build_equal_items_for_cond(THD *thd, Item *cond,
1662                                         COND_EQUAL *inherited,
1663                                         bool do_inherit)
1664 {
1665   Item_equal *item_equal;
1666   COND_EQUAL cond_equal;
1667   cond_equal.upper_levels= inherited;
1668 
1669   if (check_stack_overrun(thd, STACK_MIN_SIZE, NULL))
1670     return cond;
1671 
1672   if (cond->type() == Item::COND_ITEM)
1673   {
1674     List<Item> eq_list;
1675     bool and_level= ((Item_cond*) cond)->functype() ==
1676       Item_func::COND_AND_FUNC;
1677     List<Item> *args= ((Item_cond*) cond)->argument_list();
1678 
1679     List_iterator<Item> li(*args);
1680     Item *item;
1681 
1682     if (and_level)
1683     {
1684       /*
1685          Retrieve all conjuncts of this level detecting the equality
1686          that are subject to substitution by multiple equality items and
1687          removing each such predicate from the conjunction after having
1688          found/created a multiple equality whose inference the predicate is.
1689      */
1690       while ((item= li++))
1691       {
1692         /*
1693           PS/SP note: we can safely remove a node from AND-OR
1694           structure here because it's restored before each
1695           re-execution of any prepared statement/stored procedure.
1696         */
1697         if (check_equality(thd, item, &cond_equal, &eq_list))
1698           li.remove();
1699       }
1700 
1701       /*
1702         Check if we eliminated all the predicates of the level, e.g.
1703         (a=a AND b=b AND a=a).
1704       */
1705       if (!args->elements &&
1706           !cond_equal.current_level.elements &&
1707           !eq_list.elements)
1708         return new Item_int((longlong) 1, 1);
1709 
1710       List_iterator_fast<Item_equal> it(cond_equal.current_level);
1711       while ((item_equal= it++))
1712       {
1713         item_equal->fix_length_and_dec();
1714         item_equal->update_used_tables();
1715         set_if_bigger(thd->lex->current_select->max_equal_elems,
1716                       item_equal->members());
1717       }
1718 
1719       ((Item_cond_and*)cond)->cond_equal= cond_equal;
1720       inherited= &(((Item_cond_and*)cond)->cond_equal);
1721     }
1722     /*
1723        Make replacement of equality predicates for lower levels
1724        of the condition expression.
1725     */
1726     li.rewind();
1727     while ((item= li++))
1728     {
1729       Item *new_item=
1730         build_equal_items_for_cond(thd, item, inherited, do_inherit);
1731       if (new_item != item)
1732       {
1733         /* This replacement happens only for standalone equalities */
1734         /*
1735           This is ok with PS/SP as the replacement is done for
1736           arguments of an AND/OR item, which are restored for each
1737           execution of PS/SP.
1738         */
1739         li.replace(new_item);
1740       }
1741     }
1742     if (and_level)
1743     {
1744       args->concat(&eq_list);
1745       args->concat((List<Item> *)&cond_equal.current_level);
1746     }
1747   }
1748   else if (cond->type() == Item::FUNC_ITEM)
1749   {
1750     List<Item> eq_list;
1751     /*
1752       If an equality predicate forms the whole and level,
1753       we call it standalone equality and it's processed here.
1754       E.g. in the following where condition
1755       WHERE a=5 AND (b=5 or a=c)
1756       (b=5) and (a=c) are standalone equalities.
1757       In general we can't leave alone standalone eqalities:
1758       for WHERE a=b AND c=d AND (b=c OR d=5)
1759       b=c is replaced by =(a,b,c,d).
1760      */
1761     if (check_equality(thd, cond, &cond_equal, &eq_list))
1762     {
1763       int n= cond_equal.current_level.elements + eq_list.elements;
1764       if (n == 0)
1765         return new Item_int((longlong) 1,1);
1766       else if (n == 1)
1767       {
1768         if ((item_equal= cond_equal.current_level.pop()))
1769         {
1770           item_equal->fix_length_and_dec();
1771           item_equal->update_used_tables();
1772           set_if_bigger(thd->lex->current_select->max_equal_elems,
1773                         item_equal->members());
1774           return item_equal;
1775 	}
1776 
1777         return eq_list.pop();
1778       }
1779       else
1780       {
1781         /*
1782           Here a new AND level must be created. It can happen only
1783           when a row equality is processed as a standalone predicate.
1784 	*/
1785         Item_cond_and *and_cond= new Item_cond_and(eq_list);
1786         and_cond->quick_fix_field();
1787         List<Item> *args= and_cond->argument_list();
1788         List_iterator_fast<Item_equal> it(cond_equal.current_level);
1789         while ((item_equal= it++))
1790         {
1791           item_equal->fix_length_and_dec();
1792           item_equal->update_used_tables();
1793           set_if_bigger(thd->lex->current_select->max_equal_elems,
1794                         item_equal->members());
1795         }
1796         and_cond->cond_equal= cond_equal;
1797         args->concat((List<Item> *)&cond_equal.current_level);
1798 
1799         return and_cond;
1800       }
1801     }
1802 
1803     if (do_inherit)
1804     {
1805       /*
1806         For each field reference in cond, not from equal item predicates,
1807         set a pointer to the multiple equality it belongs to (if there is any)
1808         as soon the field is not of a string type or the field reference is
1809         an argument of a comparison predicate.
1810       */
1811       uchar *is_subst_valid= (uchar *) 1;
1812       cond= cond->compile(&Item::subst_argument_checker,
1813                           &is_subst_valid,
1814                           &Item::equal_fields_propagator,
1815                           (uchar *) inherited);
1816     }
1817     cond->update_used_tables();
1818   }
1819   return cond;
1820 }
1821 
1822 
1823 /**
1824   Build multiple equalities for a condition and all on expressions that
1825   inherit these multiple equalities.
1826 
1827     The function first applies the build_equal_items_for_cond function
1828     to build all multiple equalities for condition cond utilizing equalities
1829     referred through the parameter inherited. The extended set of
1830     equalities is returned in the structure referred by the cond_equal_ref
1831     parameter. After this the function calls itself recursively for
1832     all on expressions whose direct references can be found in join_list
1833     and who inherit directly the multiple equalities just having built.
1834 
1835   @note
1836     The on expression used in an outer join operation inherits all equalities
1837     from the on expression of the embedding join, if there is any, or
1838     otherwise - from the where condition.
1839     This fact is not obvious, but presumably can be proved.
1840     Consider the following query:
1841     @code
1842       SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t1.a=t3.a AND t2.a=t4.a
1843         WHERE t1.a=t2.a;
1844     @endcode
1845     If the on expression in the query inherits =(t1.a,t2.a), then we
1846     can build the multiple equality =(t1.a,t2.a,t3.a,t4.a) that infers
1847     the equality t3.a=t4.a. Although the on expression
1848     t1.a=t3.a AND t2.a=t4.a AND t3.a=t4.a is not equivalent to the one
1849     in the query the latter can be replaced by the former: the new query
1850     will return the same result set as the original one.
1851 
1852     Interesting that multiple equality =(t1.a,t2.a,t3.a,t4.a) allows us
1853     to use t1.a=t3.a AND t3.a=t4.a under the on condition:
1854     @code
1855       SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t1.a=t3.a AND t3.a=t4.a
1856         WHERE t1.a=t2.a
1857     @endcode
1858     This query equivalent to:
1859     @code
1860       SELECT * FROM (t1 LEFT JOIN (t3,t4) ON t1.a=t3.a AND t3.a=t4.a),t2
1861         WHERE t1.a=t2.a
1862     @endcode
1863     Similarly the original query can be rewritten to the query:
1864     @code
1865       SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t2.a=t4.a AND t3.a=t4.a
1866         WHERE t1.a=t2.a
1867     @endcode
1868     that is equivalent to:
1869     @code
1870       SELECT * FROM (t2 LEFT JOIN (t3,t4)ON t2.a=t4.a AND t3.a=t4.a), t1
1871         WHERE t1.a=t2.a
1872     @endcode
1873     Thus, applying equalities from the where condition we basically
1874     can get more freedom in performing join operations.
1875     Althogh we don't use this property now, it probably makes sense to use
1876     it in the future.
1877   @param thd		      Thread handler
1878   @param cond                condition to build the multiple equalities for
1879   @param inherited           path to all inherited multiple equality items
1880   @param do_inherit          whether or not to inherit equalities from other
1881                              parts of the condition
1882   @param join_list           list of join tables to which the condition
1883                              refers to
1884   @param[out] cond_equal_ref pointer to the structure to place built
1885                              equalities in
1886 
1887   @return
1888     pointer to the transformed condition containing multiple equalities
1889 */
1890 
build_equal_items(THD * thd,Item * cond,COND_EQUAL * inherited,bool do_inherit,List<TABLE_LIST> * join_list,COND_EQUAL ** cond_equal_ref)1891 Item *build_equal_items(THD *thd, Item *cond, COND_EQUAL *inherited,
1892                         bool do_inherit, List<TABLE_LIST> *join_list,
1893                         COND_EQUAL **cond_equal_ref)
1894 {
1895   COND_EQUAL *cond_equal= 0;
1896 
1897   if (cond)
1898   {
1899     cond= build_equal_items_for_cond(thd, cond, inherited, do_inherit);
1900     cond->update_used_tables();
1901     if (cond->type() == Item::COND_ITEM &&
1902         ((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
1903       cond_equal= &((Item_cond_and*) cond)->cond_equal;
1904     else if (cond->type() == Item::FUNC_ITEM &&
1905              ((Item_cond*) cond)->functype() == Item_func::MULT_EQUAL_FUNC)
1906     {
1907       cond_equal= new COND_EQUAL;
1908       cond_equal->current_level.push_back((Item_equal *) cond);
1909     }
1910   }
1911   if (cond_equal)
1912   {
1913     cond_equal->upper_levels= inherited;
1914     inherited= cond_equal;
1915   }
1916   *cond_equal_ref= cond_equal;
1917 
1918   if (join_list)
1919   {
1920     TABLE_LIST *table;
1921     List_iterator<TABLE_LIST> li(*join_list);
1922 
1923     while ((table= li++))
1924     {
1925       if (table->join_cond())
1926       {
1927         List<TABLE_LIST> *nested_join_list= table->nested_join ?
1928           &table->nested_join->join_list : NULL;
1929         /*
1930           We can modify table->join_cond() because its old value will
1931           be restored before re-execution of PS/SP.
1932         */
1933         table->set_join_cond(build_equal_items(thd, table->join_cond(),
1934                                                inherited, do_inherit,
1935                                                nested_join_list,
1936                                                &table->cond_equal));
1937       }
1938     }
1939   }
1940 
1941   return cond;
1942 }
1943 
1944 
1945 /**
1946   Compare field items by table order in the execution plan.
1947 
1948     field1 considered as better than field2 if the table containing
1949     field1 is accessed earlier than the table containing field2.
1950     The function finds out what of two fields is better according
1951     this criteria.
1952 
1953   @param field1          first field item to compare
1954   @param field2          second field item to compare
1955   @param table_join_idx  index to tables determining table order
1956 
1957   @retval
1958    -1  if field1 is better than field2
1959   @retval
1960     1  if field2 is better than field1
1961   @retval
1962     0  otherwise
1963 */
1964 
compare_fields_by_table_order(Item_field * field1,Item_field * field2,void * table_join_idx)1965 static int compare_fields_by_table_order(Item_field *field1,
1966                                   Item_field *field2,
1967                                   void *table_join_idx)
1968 {
1969   int cmp= 0;
1970   bool outer_ref= 0;
1971   if (field1->used_tables() & OUTER_REF_TABLE_BIT)
1972   {
1973     outer_ref= 1;
1974     cmp= -1;
1975   }
1976   if (field2->used_tables() & OUTER_REF_TABLE_BIT)
1977   {
1978     outer_ref= 1;
1979     cmp++;
1980   }
1981   if (outer_ref)
1982     return cmp;
1983   JOIN_TAB **idx= (JOIN_TAB **) table_join_idx;
1984 
1985   /*
1986     idx is NULL if this function was not called from JOIN::optimize()
1987     but from e.g. mysql_delete() or mysql_update(). In these cases
1988     there is only one table and both fields belong to it. Example
1989     condition where this is the case: t1.fld1=t1.fld2
1990   */
1991   if (!idx)
1992     return 0;
1993 
1994   cmp= idx[field1->field->table->tablenr]-idx[field2->field->table->tablenr];
1995   return cmp < 0 ? -1 : (cmp ? 1 : 0);
1996 }
1997 
1998 
1999 /**
2000   Generate minimal set of simple equalities equivalent to a multiple equality.
2001 
2002     The function retrieves the fields of the multiple equality item
2003     item_equal and  for each field f:
2004     - if item_equal contains const it generates the equality f=const_item;
2005     - otherwise, if f is not the first field, generates the equality
2006       f=item_equal->get_first().
2007     All generated equality are added to the cond conjunction.
2008 
2009   @param cond            condition to add the generated equality to
2010   @param upper_levels    structure to access multiple equality of upper levels
2011   @param item_equal      multiple equality to generate simple equality from
2012 
2013   @note
2014     Before generating an equality function checks that it has not
2015     been generated for multiple equalities of the upper levels.
2016     E.g. for the following where condition
2017     WHERE a=5 AND ((a=b AND b=c) OR  c>4)
2018     the upper level AND condition will contain =(5,a),
2019     while the lower level AND condition will contain =(5,a,b,c).
2020     When splitting =(5,a,b,c) into a separate equality predicates
2021     we should omit 5=a, as we have it already in the upper level.
2022     The following where condition gives us a more complicated case:
2023     WHERE t1.a=t2.b AND t3.c=t4.d AND (t2.b=t3.c OR t4.e>5 ...) AND ...
2024     Given the tables are accessed in the order t1->t2->t3->t4 for
2025     the selected query execution plan the lower level multiple
2026     equality =(t1.a,t2.b,t3.c,t4.d) formally  should be converted to
2027     t1.a=t2.b AND t1.a=t3.c AND t1.a=t4.d. But t1.a=t2.a will be
2028     generated for the upper level. Also t3.c=t4.d will be generated there.
2029     So only t1.a=t3.c should be left in the lower level.
2030     If cond is equal to 0, then not more then one equality is generated
2031     and a pointer to it is returned as the result of the function.
2032 
2033   @return
2034     - The condition with generated simple equalities or
2035     a pointer to the simple generated equality, if success.
2036     - 0, otherwise.
2037 */
2038 
eliminate_item_equal(Item * cond,COND_EQUAL * upper_levels,Item_equal * item_equal)2039 static Item *eliminate_item_equal(Item *cond, COND_EQUAL *upper_levels,
2040                                   Item_equal *item_equal)
2041 {
2042   List<Item> eq_list;
2043   Item_func_eq *eq_item= NULL;
2044   if (((Item *) item_equal)->const_item() && !item_equal->val_int())
2045     return new Item_int((longlong) 0,1);
2046   Item *const item_const= item_equal->get_const();
2047   Item_equal_iterator it(*item_equal);
2048   if (!item_const)
2049   {
2050     /*
2051       If there is a const item, match all field items with the const item,
2052       otherwise match the second and subsequent field items with the first one:
2053     */
2054     it++;
2055   }
2056   Item_field *item_field; // Field to generate equality for.
2057   while ((item_field= it++))
2058   {
2059     /*
2060       Generate an equality of the form:
2061       item_field = some previous field in item_equal's list.
2062 
2063       First see if we really need to generate it:
2064     */
2065     Item_equal *const upper= item_field->find_item_equal(upper_levels);
2066     if (upper) // item_field is in this upper equality
2067     {
2068       if (item_const && upper->get_const())
2069         continue; // Const at both levels, no need to generate at current level
2070       /*
2071         If the upper-level multiple equality contains this item, there is no
2072         need to generate the equality, unless item_field belongs to a
2073         semi-join nest that is used for Materialization, and refers to tables
2074         that are outside of the materialized semi-join nest,
2075         As noted in Item_equal::get_subst_item(), subquery materialization
2076         does not have this problem.
2077       */
2078       JOIN_TAB *const tab= item_field->field->table->reginfo.join_tab;
2079 
2080       if (!(tab && sj_is_materialize_strategy(tab->get_sj_strategy())))
2081       {
2082         Item_field *item_match;
2083         Item_equal_iterator li(*item_equal);
2084         while ((item_match= li++) != item_field)
2085         {
2086           if (item_match->find_item_equal(upper_levels) == upper)
2087             break; // (item_match, item_field) is also in upper level equality
2088         }
2089         if (item_match != item_field)
2090           continue;
2091       }
2092     } // ... if (upper).
2093 
2094     /*
2095       item_field should be compared with the head of the multiple equality
2096       list.
2097       item_field may refer to a table that is within a semijoin materialization
2098       nest. In that case, the order of the join_tab entries may look like:
2099 
2100         ot1 ot2 <subquery> ot5 SJM(it3 it4)
2101 
2102       If we have a multiple equality
2103 
2104         (ot1.c1, ot2.c2, <subquery>.c it3.c3, it4.c4, ot5.c5),
2105 
2106       we should generate the following equalities:
2107         1. ot1.c1 = ot2.c2
2108         2. ot1.c1 = <subquery>.c
2109         3. it3.c3 = it4.c4
2110         4. ot1.c1 = ot5.c5
2111 
2112       Equalities 1) and 4) are regular equalities between two outer tables.
2113       Equality 2) is an equality that matches the outer query with a
2114       materialized temporary table. It is either performed as a lookup
2115       into the materialized table (SJM-lookup), or as a condition on the
2116       outer table (SJM-scan).
2117       Equality 3) is evaluated during semijoin materialization.
2118 
2119       If there is a const item, match against this one.
2120       Otherwise, match against the first field item in the multiple equality,
2121       unless the item is within a materialized semijoin nest, in case it will
2122       be matched against the first item within the SJM nest.
2123       @see JOIN::set_access_methods()
2124       @see JOIN::set_prefix_tables()
2125       @see Item_equal::get_subst_item()
2126     */
2127 
2128     Item *const head=
2129       item_const ? item_const : item_equal->get_subst_item(item_field);
2130     if (head == item_field)
2131       continue;
2132 
2133     // we have a pair, can generate 'item_field=head'
2134     if (eq_item)
2135       eq_list.push_back(eq_item);
2136 
2137     eq_item= new Item_func_eq(item_field, head);
2138     if (!eq_item || eq_item->set_cmp_func())
2139       return NULL;
2140     eq_item->quick_fix_field();
2141   } // ... while ((item_field= it++))
2142 
2143   if (!cond && !eq_list.head())
2144   {
2145     if (!eq_item)
2146       return new Item_int((longlong) 1,1);
2147     return eq_item;
2148   }
2149 
2150   if (eq_item)
2151     eq_list.push_back(eq_item);
2152   if (!cond)
2153     cond= new Item_cond_and(eq_list);
2154   else
2155   {
2156     DBUG_ASSERT(cond->type() == Item::COND_ITEM);
2157     if (eq_list.elements)
2158       ((Item_cond *) cond)->add_at_head(&eq_list);
2159   }
2160 
2161   cond->quick_fix_field();
2162   cond->update_used_tables();
2163 
2164   return cond;
2165 }
2166 
2167 
2168 /**
2169   Substitute every field reference in a condition by the best equal field
2170   and eliminate all multiple equality predicates.
2171 
2172     The function retrieves the cond condition and for each encountered
2173     multiple equality predicate it sorts the field references in it
2174     according to the order of tables specified by the table_join_idx
2175     parameter. Then it eliminates the multiple equality predicate it
2176     replacing it by the conjunction of simple equality predicates
2177     equating every field from the multiple equality to the first
2178     field in it, or to the constant, if there is any.
2179     After this the function retrieves all other conjuncted
2180     predicates substitute every field reference by the field reference
2181     to the first equal field or equal constant if there are any.
2182 
2183   @param cond            condition to process
2184   @param cond_equal      multiple equalities to take into consideration
2185   @param table_join_idx  index to tables determining field preference
2186 
2187   @note
2188     At the first glance full sort of fields in multiple equality
2189     seems to be an overkill. Yet it's not the case due to possible
2190     new fields in multiple equality item of lower levels. We want
2191     the order in them to comply with the order of upper levels.
2192 
2193   @return
2194     The transformed condition, or NULL in case of error
2195 */
2196 
substitute_for_best_equal_field(Item * cond,COND_EQUAL * cond_equal,void * table_join_idx)2197 Item* substitute_for_best_equal_field(Item *cond,
2198                                       COND_EQUAL *cond_equal,
2199                                       void *table_join_idx)
2200 {
2201   Item_equal *item_equal;
2202 
2203   if (cond->type() == Item::COND_ITEM)
2204   {
2205     List<Item> *cond_list= ((Item_cond*) cond)->argument_list();
2206 
2207     bool and_level= ((Item_cond*) cond)->functype() ==
2208                       Item_func::COND_AND_FUNC;
2209     if (and_level)
2210     {
2211       cond_equal= &((Item_cond_and *) cond)->cond_equal;
2212       cond_list->disjoin((List<Item> *) &cond_equal->current_level);
2213 
2214       List_iterator_fast<Item_equal> it(cond_equal->current_level);
2215       while ((item_equal= it++))
2216       {
2217         item_equal->sort(&compare_fields_by_table_order, table_join_idx);
2218       }
2219     }
2220 
2221     List_iterator<Item> li(*cond_list);
2222     Item *item;
2223     while ((item= li++))
2224     {
2225       Item *new_item =substitute_for_best_equal_field(item, cond_equal,
2226                                                       table_join_idx);
2227       /*
2228         This works OK with PS/SP re-execution as changes are made to
2229         the arguments of AND/OR items only
2230       */
2231       if (new_item != item)
2232         li.replace(new_item);
2233     }
2234 
2235     if (and_level)
2236     {
2237       List_iterator_fast<Item_equal> it(cond_equal->current_level);
2238       while ((item_equal= it++))
2239       {
2240         cond= eliminate_item_equal(cond, cond_equal->upper_levels, item_equal);
2241         if (cond == NULL)
2242           return NULL;
2243         // This occurs when eliminate_item_equal() founds that cond is
2244         // always false and substitutes it with Item_int 0.
2245         // Due to this, value of item_equal will be 0, so just return it.
2246         if (cond->type() != Item::COND_ITEM)
2247           break;
2248       }
2249     }
2250     if (cond->type() == Item::COND_ITEM &&
2251         !((Item_cond*)cond)->argument_list()->elements)
2252       cond= new Item_int((int32)cond->val_bool());
2253 
2254   }
2255   else if (cond->type() == Item::FUNC_ITEM &&
2256            ((Item_cond*) cond)->functype() == Item_func::MULT_EQUAL_FUNC)
2257   {
2258     item_equal= (Item_equal *) cond;
2259     item_equal->sort(&compare_fields_by_table_order, table_join_idx);
2260     if (cond_equal && cond_equal->current_level.head() == item_equal)
2261       cond_equal= cond_equal->upper_levels;
2262     return eliminate_item_equal(0, cond_equal, item_equal);
2263   }
2264   else
2265     cond->transform(&Item::replace_equal_field, 0);
2266   return cond;
2267 }
2268 
2269 
2270 /*
2271   change field = field to field = const for each found field = const in the
2272   and_level
2273 */
2274 
2275 static void
change_cond_ref_to_const(THD * thd,I_List<COND_CMP> * save_list,Item * and_father,Item * cond,Item * field,Item * value)2276 change_cond_ref_to_const(THD *thd, I_List<COND_CMP> *save_list,
2277                          Item *and_father, Item *cond,
2278                          Item *field, Item *value)
2279 {
2280   if (cond->type() == Item::COND_ITEM)
2281   {
2282     bool and_level= ((Item_cond*) cond)->functype() ==
2283       Item_func::COND_AND_FUNC;
2284     List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
2285     Item *item;
2286     while ((item=li++))
2287       change_cond_ref_to_const(thd, save_list,and_level ? cond : item, item,
2288 			       field, value);
2289     return;
2290   }
2291   if (cond->eq_cmp_result() == Item::COND_OK)
2292     return;					// Not a boolean function
2293 
2294   Item_bool_func2 *func=  (Item_bool_func2*) cond;
2295   Item **args= func->arguments();
2296   Item *left_item=  args[0];
2297   Item *right_item= args[1];
2298   Item_func::Functype functype=  func->functype();
2299 
2300   if (right_item->eq(field,0) && left_item != value &&
2301       right_item->cmp_context == field->cmp_context &&
2302       (left_item->result_type() != STRING_RESULT ||
2303        value->result_type() != STRING_RESULT ||
2304        left_item->collation.collation == value->collation.collation))
2305   {
2306     Item *tmp=value->clone_item();
2307     if (tmp)
2308     {
2309       tmp->collation.set(right_item->collation);
2310       thd->change_item_tree(args + 1, tmp);
2311       func->update_used_tables();
2312       if ((functype == Item_func::EQ_FUNC || functype == Item_func::EQUAL_FUNC)
2313 	  && and_father != cond && !left_item->const_item())
2314       {
2315 	cond->marker=1;
2316 	COND_CMP *tmp2;
2317 	if ((tmp2=new COND_CMP(and_father,func)))
2318 	  save_list->push_back(tmp2);
2319       }
2320       func->set_cmp_func();
2321     }
2322   }
2323   else if (left_item->eq(field,0) && right_item != value &&
2324            left_item->cmp_context == field->cmp_context &&
2325            (right_item->result_type() != STRING_RESULT ||
2326             value->result_type() != STRING_RESULT ||
2327             right_item->collation.collation == value->collation.collation))
2328   {
2329     Item *tmp= value->clone_item();
2330     if (tmp)
2331     {
2332       tmp->collation.set(left_item->collation);
2333       thd->change_item_tree(args, tmp);
2334       value= tmp;
2335       func->update_used_tables();
2336       if ((functype == Item_func::EQ_FUNC || functype == Item_func::EQUAL_FUNC)
2337 	  && and_father != cond && !right_item->const_item())
2338       {
2339         args[0]= args[1];                       // For easy check
2340         thd->change_item_tree(args + 1, value);
2341 	cond->marker=1;
2342 	COND_CMP *tmp2;
2343 	if ((tmp2=new COND_CMP(and_father,func)))
2344 	  save_list->push_back(tmp2);
2345       }
2346       func->set_cmp_func();
2347     }
2348   }
2349 }
2350 
2351 static void
propagate_cond_constants(THD * thd,I_List<COND_CMP> * save_list,Item * and_father,Item * cond)2352 propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list,
2353                          Item *and_father, Item *cond)
2354 {
2355   if (cond->type() == Item::COND_ITEM)
2356   {
2357     bool and_level= ((Item_cond*) cond)->functype() ==
2358       Item_func::COND_AND_FUNC;
2359     List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
2360     Item *item;
2361     I_List<COND_CMP> save;
2362     while ((item=li++))
2363     {
2364       propagate_cond_constants(thd, &save,and_level ? cond : item, item);
2365     }
2366     if (and_level)
2367     {						// Handle other found items
2368       I_List_iterator<COND_CMP> cond_itr(save);
2369       COND_CMP *cond_cmp;
2370       while ((cond_cmp=cond_itr++))
2371       {
2372         Item **args= cond_cmp->cmp_func->arguments();
2373         if (!args[0]->const_item())
2374           change_cond_ref_to_const(thd, &save,cond_cmp->and_level,
2375                                    cond_cmp->and_level, args[0], args[1]);
2376       }
2377     }
2378   }
2379   else if (and_father != cond && !cond->marker)		// In a AND group
2380   {
2381     if (cond->type() == Item::FUNC_ITEM &&
2382 	(((Item_func*) cond)->functype() == Item_func::EQ_FUNC ||
2383 	 ((Item_func*) cond)->functype() == Item_func::EQUAL_FUNC))
2384     {
2385       Item_func_eq *func=(Item_func_eq*) cond;
2386       Item **args= func->arguments();
2387       bool left_const= args[0]->const_item();
2388       bool right_const= args[1]->const_item();
2389       if (!(left_const && right_const) &&
2390           args[0]->result_type() == args[1]->result_type())
2391       {
2392 	if (right_const)
2393 	{
2394           resolve_const_item(thd, &args[1], args[0]);
2395 	  func->update_used_tables();
2396           change_cond_ref_to_const(thd, save_list, and_father, and_father,
2397                                    args[0], args[1]);
2398 	}
2399 	else if (left_const)
2400 	{
2401           resolve_const_item(thd, &args[0], args[1]);
2402 	  func->update_used_tables();
2403           change_cond_ref_to_const(thd, save_list, and_father, and_father,
2404                                    args[1], args[0]);
2405 	}
2406       }
2407     }
2408   }
2409 }
2410 
2411 
2412 /**
2413   Simplify joins replacing outer joins by inner joins whenever it's
2414   possible.
2415 
2416     The function, during a retrieval of join_list,  eliminates those
2417     outer joins that can be converted into inner join, possibly nested.
2418     It also moves the join conditions for the converted outer joins
2419     and from inner joins to conds.
2420     The function also calculates some attributes for nested joins:
2421     - used_tables
2422     - not_null_tables
2423     - dep_tables.
2424     - on_expr_dep_tables
2425     The first two attributes are used to test whether an outer join can
2426     be substituted for an inner join. The third attribute represents the
2427     relation 'to be dependent on' for tables. If table t2 is dependent
2428     on table t1, then in any evaluated execution plan table access to
2429     table t2 must precede access to table t2. This relation is used also
2430     to check whether the query contains  invalid cross-references.
2431     The forth attribute is an auxiliary one and is used to calculate
2432     dep_tables.
2433     As the attribute dep_tables qualifies possibles orders of tables in the
2434     execution plan, the dependencies required by the straight join
2435     modifiers are reflected in this attribute as well.
2436     The function also removes all braces that can be removed from the join
2437     expression without changing its meaning.
2438 
2439   @note
2440     An outer join can be replaced by an inner join if the where condition
2441     or the join condition for an embedding nested join contains a conjunctive
2442     predicate rejecting null values for some attribute of the inner tables.
2443 
2444     E.g. in the query:
2445     @code
2446       SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a WHERE t2.b < 5
2447     @endcode
2448     the predicate t2.b < 5 rejects nulls.
2449     The query is converted first to:
2450     @code
2451       SELECT * FROM t1 INNER JOIN t2 ON t2.a=t1.a WHERE t2.b < 5
2452     @endcode
2453     then to the equivalent form:
2454     @code
2455       SELECT * FROM t1, t2 ON t2.a=t1.a WHERE t2.b < 5 AND t2.a=t1.a
2456     @endcode
2457 
2458 
2459     Similarly the following query:
2460     @code
2461       SELECT * from t1 LEFT JOIN (t2, t3) ON t2.a=t1.a t3.b=t1.b
2462         WHERE t2.c < 5
2463     @endcode
2464     is converted to:
2465     @code
2466       SELECT * FROM t1, (t2, t3) WHERE t2.c < 5 AND t2.a=t1.a t3.b=t1.b
2467 
2468     @endcode
2469 
2470     One conversion might trigger another:
2471     @code
2472       SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a
2473                        LEFT JOIN t3 ON t3.b=t2.b
2474         WHERE t3 IS NOT NULL =>
2475       SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a, t3
2476         WHERE t3 IS NOT NULL AND t3.b=t2.b =>
2477       SELECT * FROM t1, t2, t3
2478         WHERE t3 IS NOT NULL AND t3.b=t2.b AND t2.a=t1.a
2479   @endcode
2480 
2481     The function removes all unnecessary braces from the expression
2482     produced by the conversions.
2483     E.g.
2484     @code
2485       SELECT * FROM t1, (t2, t3) WHERE t2.c < 5 AND t2.a=t1.a AND t3.b=t1.b
2486     @endcode
2487     finally is converted to:
2488     @code
2489       SELECT * FROM t1, t2, t3 WHERE t2.c < 5 AND t2.a=t1.a AND t3.b=t1.b
2490 
2491     @endcode
2492 
2493 
2494     It also will remove braces from the following queries:
2495     @code
2496       SELECT * from (t1 LEFT JOIN t2 ON t2.a=t1.a) LEFT JOIN t3 ON t3.b=t2.b
2497       SELECT * from (t1, (t2,t3)) WHERE t1.a=t2.a AND t2.b=t3.b.
2498     @endcode
2499 
2500     The benefit of this simplification procedure is that it might return
2501     a query for which the optimizer can evaluate execution plan with more
2502     join orders. With a left join operation the optimizer does not
2503     consider any plan where one of the inner tables is before some of outer
2504     tables.
2505 
2506   IMPLEMENTATION
2507     The function is implemented by a recursive procedure.  On the recursive
2508     ascent all attributes are calculated, all outer joins that can be
2509     converted are replaced and then all unnecessary braces are removed.
2510     As join list contains join tables in the reverse order sequential
2511     elimination of outer joins does not require extra recursive calls.
2512 
2513   SEMI-JOIN NOTES
2514     Remove all semi-joins that have are within another semi-join (i.e. have
2515     an "ancestor" semi-join nest)
2516 
2517   EXAMPLES
2518     Here is an example of a join query with invalid cross references:
2519     @code
2520       SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t3.a LEFT JOIN t3 ON t3.b=t1.b
2521     @endcode
2522 
2523   @param join        reference to the query info
2524   @param join_list   list representation of the join to be converted
2525   @param conds       condition that join condition for converted outer joins
2526                      is added to
2527   @param top         true <=> conds is the where condition
2528   @param in_sj       TRUE <=> processing semi-join nest's children
2529   @param[out] new_conds New condition
2530   @param changelog   Don't specify this parameter, it is reserved for
2531                      recursive calls inside this function
2532 
2533   @returns true for error, false for success
2534 */
2535 
2536 static bool
simplify_joins(JOIN * join,List<TABLE_LIST> * join_list,Item * conds,bool top,bool in_sj,Item ** new_conds,uint * changelog)2537 simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, Item *conds, bool top,
2538                bool in_sj, Item **new_conds, uint *changelog)
2539 {
2540 
2541   /*
2542     Each type of change done by this function, or its recursive calls, is
2543     tracked in a bitmap:
2544   */
2545   enum change
2546   {
2547     NONE= 0,
2548     OUTER_JOIN_TO_INNER= 1 << 0,
2549     JOIN_COND_TO_WHERE= 1 << 1,
2550     PAREN_REMOVAL= 1 << 2,
2551     SEMIJOIN= 1 << 3
2552   };
2553   uint changes= 0; // To keep track of changes.
2554   if (changelog == NULL) // This is the top call.
2555     changelog= &changes;
2556 
2557   TABLE_LIST *table;
2558   NESTED_JOIN *nested_join;
2559   TABLE_LIST *prev_table= 0;
2560   List_iterator<TABLE_LIST> li(*join_list);
2561   bool straight_join= MY_TEST(join->select_options & SELECT_STRAIGHT_JOIN);
2562   DBUG_ENTER("simplify_joins");
2563 
2564   /*
2565     Try to simplify join operations from join_list.
2566     The most outer join operation is checked for conversion first.
2567   */
2568   while ((table= li++))
2569   {
2570     table_map used_tables;
2571     table_map not_null_tables= (table_map) 0;
2572 
2573     if ((nested_join= table->nested_join))
2574     {
2575       /*
2576          If the element of join_list is a nested join apply
2577          the procedure to its nested join list first.
2578       */
2579       if (table->join_cond())
2580       {
2581         Item *join_cond= table->join_cond();
2582         /*
2583            If a join condition JC is attached to the table,
2584            check all null rejected predicates in this condition.
2585            If such a predicate over an attribute belonging to
2586            an inner table of an embedded outer join is found,
2587            the outer join is converted to an inner join and
2588            the corresponding join condition is added to JC.
2589 	*/
2590         if (simplify_joins(join, &nested_join->join_list,
2591                            join_cond, false, in_sj || table->sj_on_expr,
2592                            &join_cond, changelog))
2593           DBUG_RETURN(true);
2594 
2595         if (join_cond != table->join_cond())
2596         {
2597           DBUG_ASSERT(join_cond);
2598 
2599           table->set_join_cond(join_cond);
2600         }
2601       }
2602       nested_join->used_tables= (table_map) 0;
2603       nested_join->not_null_tables=(table_map) 0;
2604       if (simplify_joins(join, &nested_join->join_list, conds, top,
2605                          in_sj || table->sj_on_expr, &conds, changelog))
2606         DBUG_RETURN(true);
2607       used_tables= nested_join->used_tables;
2608       not_null_tables= nested_join->not_null_tables;
2609     }
2610     else
2611     {
2612       used_tables= table->table->map;
2613       if (conds)
2614         not_null_tables= conds->not_null_tables();
2615     }
2616 
2617     if (table->embedding)
2618     {
2619       table->embedding->nested_join->used_tables|= used_tables;
2620       table->embedding->nested_join->not_null_tables|= not_null_tables;
2621     }
2622 
2623     if (!table->outer_join || (used_tables & not_null_tables))
2624     {
2625       /*
2626         For some of the inner tables there are conjunctive predicates
2627         that reject nulls => the outer join can be replaced by an inner join.
2628       */
2629       if (table->outer_join)
2630       {
2631         *changelog|= OUTER_JOIN_TO_INNER;
2632         table->outer_join= 0;
2633       }
2634       if (table->join_cond())
2635       {
2636         *changelog|= JOIN_COND_TO_WHERE;
2637         /* Add join condition to the WHERE or upper-level join condition. */
2638         if (conds)
2639         {
2640           Item_cond_and *new_cond=
2641             static_cast<Item_cond_and*>(and_conds(conds, table->join_cond()));
2642           if (!new_cond)
2643             DBUG_RETURN(true);
2644           conds= new_cond;
2645           conds->top_level_item();
2646           /*
2647             conds is always a new item as both the upper-level condition and a
2648             join condition existed
2649           */
2650           DBUG_ASSERT(!conds->fixed);
2651           if (conds->fix_fields(join->thd, &conds))
2652             DBUG_RETURN(true);
2653 
2654           /* If join condition has a pending rollback in THD::change_list */
2655           List_iterator<Item> lit(*new_cond->argument_list());
2656           Item *arg;
2657           while ((arg= lit++))
2658           {
2659             /*
2660               The join condition isn't necessarily the second argument anymore,
2661               since fix_fields may have merged it into an existing AND expr.
2662             */
2663             if (arg == table->join_cond())
2664               join->thd->
2665                 change_item_tree_place(table->join_cond_ref(), lit.ref());
2666           }
2667         }
2668         else
2669         {
2670           conds= table->join_cond();
2671           /* If join condition has a pending rollback in THD::change_list */
2672           join->thd->change_item_tree_place(table->join_cond_ref(), &conds);
2673         }
2674         table->set_join_cond(NULL);
2675       }
2676     }
2677 
2678     if (!top)
2679       continue;
2680 
2681     /*
2682       Only inner tables of non-convertible outer joins remain with
2683       the join condition.
2684     */
2685     if (table->join_cond())
2686     {
2687       table->dep_tables|= table->join_cond()->used_tables();
2688       if (table->embedding)
2689       {
2690         table->dep_tables&= ~table->embedding->nested_join->used_tables;
2691 
2692         // Embedding table depends on tables used in embedded join conditions.
2693         table->embedding->on_expr_dep_tables|=
2694           table->join_cond()->used_tables();
2695       }
2696       else
2697         table->dep_tables&= ~table->table->map;
2698     }
2699 
2700     if (prev_table)
2701     {
2702       /* The order of tables is reverse: prev_table follows table */
2703       if (prev_table->straight || straight_join)
2704         prev_table->dep_tables|= used_tables;
2705       if (prev_table->join_cond())
2706       {
2707         prev_table->dep_tables|= table->on_expr_dep_tables;
2708         table_map prev_used_tables= prev_table->nested_join ?
2709 	                            prev_table->nested_join->used_tables :
2710 	                            prev_table->table->map;
2711         /*
2712           If join condition contains only references to inner tables
2713           we still make the inner tables dependent on the outer tables.
2714           It would be enough to set dependency only on one outer table
2715           for them. Yet this is really a rare case.
2716           Note:
2717           RAND_TABLE_BIT mask should not be counted as it
2718           prevents update of inner table dependences.
2719           For example it might happen if RAND() function
2720           is used in JOIN ON clause.
2721 	*/
2722         if (!((prev_table->join_cond()->used_tables() & ~RAND_TABLE_BIT) &
2723               ~prev_used_tables))
2724           prev_table->dep_tables|= used_tables;
2725       }
2726     }
2727     prev_table= table;
2728   }
2729 
2730   /*
2731     Flatten nested joins that can be flattened.
2732     no join condition and not a semi-join => can be flattened.
2733   */
2734   li.rewind();
2735   while ((table= li++))
2736   {
2737     nested_join= table->nested_join;
2738     if (table->sj_on_expr && !in_sj)
2739     {
2740        /*
2741          If this is a semi-join that is not contained within another semi-join,
2742          leave it intact (otherwise it is flattened)
2743        */
2744       *changelog|= SEMIJOIN;
2745     }
2746     else if (nested_join && !table->join_cond())
2747     {
2748       *changelog|= PAREN_REMOVAL;
2749       TABLE_LIST *tbl;
2750       List_iterator<TABLE_LIST> it(nested_join->join_list);
2751       while ((tbl= it++))
2752       {
2753         tbl->embedding= table->embedding;
2754         tbl->join_list= table->join_list;
2755         tbl->dep_tables|= table->dep_tables;
2756       }
2757       li.replace(nested_join->join_list);
2758     }
2759   }
2760   *new_conds= conds;
2761 
2762   if (changes)
2763   {
2764     Opt_trace_context * trace= &join->thd->opt_trace;
2765     if (unlikely(trace->is_started()))
2766     {
2767       Opt_trace_object trace_wrapper(trace);
2768       Opt_trace_object trace_object(trace, "transformations_to_nested_joins");
2769       {
2770         Opt_trace_array trace_changes(trace, "transformations");
2771         if (changes & SEMIJOIN)
2772           trace_changes.add_alnum("semijoin");
2773         if (changes & OUTER_JOIN_TO_INNER)
2774           trace_changes.add_alnum("outer_join_to_inner_join");
2775         if (changes & JOIN_COND_TO_WHERE)
2776           trace_changes.add_alnum("JOIN_condition_to_WHERE");
2777         if (changes & PAREN_REMOVAL)
2778           trace_changes.add_alnum("parenthesis_removal");
2779       }
2780       // the newly transformed query is worth printing
2781       opt_trace_print_expanded_query(join->thd, join->select_lex,
2782                                      &trace_object);
2783     }
2784   }
2785   DBUG_RETURN(false);
2786 }
2787 
2788 
2789 /**
2790   Record join nest info in the select block.
2791 
2792   After simplification of inner join, outer join and semi-join structures:
2793    - record the remaining semi-join structures in the enclosing query block.
2794    - record transformed join conditions in TABLE_LIST objects.
2795 
2796   This function is called recursively for each join nest and/or table
2797   in the query block.
2798 
2799   @param select The query block
2800   @param tables List of tables and join nests
2801 
2802   @return False if successful, True if failure
2803 */
2804 
record_join_nest_info(st_select_lex * select,List<TABLE_LIST> * tables)2805 static bool record_join_nest_info(st_select_lex *select,
2806                                   List<TABLE_LIST> *tables)
2807 
2808 {
2809   TABLE_LIST *table;
2810   List_iterator<TABLE_LIST> li(*tables);
2811   DBUG_ENTER("record_join_nest_info");
2812 
2813   while ((table= li++))
2814   {
2815     table->prep_join_cond= table->join_cond() ?
2816       table->join_cond()->copy_andor_structure(select->join->thd, true) : NULL;
2817 
2818     if (table->nested_join == NULL)
2819       continue;
2820 
2821     if (record_join_nest_info(select, &table->nested_join->join_list))
2822       DBUG_RETURN(true);
2823     /*
2824       sj_inner_tables is set properly later in pull_out_semijoin_tables().
2825       This assignment is required in case pull_out_semijoin_tables()
2826       is not called.
2827     */
2828     if (table->sj_on_expr)
2829       table->sj_inner_tables= table->nested_join->used_tables;
2830     if (table->sj_on_expr && select->sj_nests.push_back(table))
2831       DBUG_RETURN(true);
2832   }
2833   DBUG_RETURN(false);
2834 }
2835 
2836 
2837 /**
2838   Assign each nested join structure a bit in nested_join_map.
2839 
2840   @param join_list     List of tables
2841   @param first_unused  Number of first unused bit in nested_join_map before the
2842                        call
2843 
2844   @note
2845     This function is called after simplify_joins(), when there are no
2846     redundant nested joins.
2847     We cannot have more nested joins in a query block than there are tables,
2848     so as long as the number of bits in nested_join_map is not less than the
2849     maximum number of tables in a query block, nested_join_map can never
2850     overflow.
2851 
2852   @return
2853     First unused bit in nested_join_map after the call.
2854 */
2855 
build_bitmap_for_nested_joins(List<TABLE_LIST> * join_list,uint first_unused)2856 static uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list,
2857                                           uint first_unused)
2858 {
2859   List_iterator<TABLE_LIST> li(*join_list);
2860   TABLE_LIST *table;
2861   DBUG_ENTER("build_bitmap_for_nested_joins");
2862   while ((table= li++))
2863   {
2864     NESTED_JOIN *nested_join;
2865     if ((nested_join= table->nested_join))
2866     {
2867       // We should have either a join condition or a semi-join condition
2868       DBUG_ASSERT((table->join_cond() == NULL) == (table->sj_on_expr != NULL));
2869 
2870       nested_join->nj_map= 0;
2871       nested_join->nj_total= 0;
2872       /*
2873         We only record nested join information for outer join nests.
2874         Tables belonging in semi-join nests are recorded in the
2875         embedding outer join nest, if one exists.
2876       */
2877       if (table->join_cond())
2878       {
2879         DBUG_ASSERT(first_unused < sizeof(nested_join_map)*8);
2880         nested_join->nj_map= (nested_join_map) 1 << first_unused++;
2881         nested_join->nj_total= nested_join->join_list.elements;
2882       }
2883       else if (table->sj_on_expr)
2884       {
2885         NESTED_JOIN *const outer_nest=
2886           table->embedding ? table->embedding->nested_join : NULL;
2887         /*
2888           The semi-join nest has already been counted into the table count
2889           for the outer join nest as one table, so subtract 1 from the
2890           table count.
2891         */
2892         if (outer_nest)
2893           outer_nest->nj_total+= (nested_join->join_list.elements - 1);
2894       }
2895       else
2896         DBUG_ASSERT(false);
2897 
2898       first_unused= build_bitmap_for_nested_joins(&nested_join->join_list,
2899                                                   first_unused);
2900     }
2901   }
2902   DBUG_RETURN(first_unused);
2903 }
2904 
2905 
2906 /** Update the dependency map for the tables. */
2907 
update_depend_map(JOIN * join)2908 void update_depend_map(JOIN *join)
2909 {
2910   for (uint tableno = 0; tableno < join->tables; tableno++)
2911   {
2912     JOIN_TAB *const join_tab= join->join_tab + tableno;
2913     TABLE_REF *const ref= &join_tab->ref;
2914     table_map depend_map=0;
2915     Item **item=ref->items;
2916     uint i;
2917     for (i=0 ; i < ref->key_parts ; i++,item++)
2918       depend_map|=(*item)->used_tables();
2919     depend_map&= ~PSEUDO_TABLE_BITS;
2920     ref->depend_map= depend_map;
2921     for (JOIN_TAB **tab=join->map2table;
2922 	 depend_map ;
2923 	 tab++,depend_map>>=1 )
2924     {
2925       if (depend_map & 1)
2926 	ref->depend_map|=(*tab)->ref.depend_map;
2927     }
2928   }
2929 }
2930 
2931 
2932 /** Update the dependency map for the sort order. */
2933 
update_depend_map(JOIN * join,ORDER * order)2934 static void update_depend_map(JOIN *join, ORDER *order)
2935 {
2936   for (; order ; order=order->next)
2937   {
2938     table_map depend_map;
2939     order->item[0]->update_used_tables();
2940     order->depend_map= depend_map=
2941       order->item[0]->used_tables() & ~PARAM_TABLE_BIT;
2942     order->used= 0;
2943     // Not item_sum(), RAND() and no reference to table outside of sub select
2944     if (!(order->depend_map & (OUTER_REF_TABLE_BIT | RAND_TABLE_BIT))
2945         && !order->item[0]->with_sum_func)
2946     {
2947       for (JOIN_TAB **tab=join->map2table;
2948 	   depend_map ;
2949 	   tab++, depend_map>>=1)
2950       {
2951 	if (depend_map & 1)
2952 	  order->depend_map|=(*tab)->ref.depend_map;
2953       }
2954     }
2955   }
2956 }
2957 
2958 
2959 /**
2960   Update equalities and keyuse references after semi-join materialization
2961   strategy is chosen.
2962 
2963   @details
2964     For each multiple equality that contains a field that is selected
2965     from a subquery, and that subquery is executed using a semi-join
2966     materialization strategy, add the corresponding column in the materialized
2967     temporary table to the equality.
2968     For each injected semi-join equality that is not converted to
2969     multiple equality, replace the reference to the expression selected
2970     from the subquery with the corresponding column in the temporary table.
2971 
2972     This is needed to properly reflect the equalities that involve injected
2973     semi-join equalities when materialization strategy is chosen.
2974     @see eliminate_item_equal() for how these equalities are used to generate
2975     correct equality predicates.
2976 
2977     The MaterializeScan semi-join strategy requires some additional processing:
2978     All primary tables after the materialized temporary table must be inspected
2979     for keyuse objects that point to expressions from the subquery tables.
2980     These references must be replaced with references to corresponding columns
2981     in the materialized temporary table instead. Those primary tables using
2982     ref access will thus be made to depend on the materialized temporary table
2983     instead of the subquery tables.
2984 
2985     Only the injected semi-join equalities need this treatment, other predicates
2986     will be handled correctly by the regular item substitution process.
2987 
2988   @return False if success, true if error
2989 */
2990 
update_equalities_for_sjm()2991 bool JOIN::update_equalities_for_sjm()
2992 {
2993   List_iterator<Semijoin_mat_exec> it(sjm_exec_list);
2994   Semijoin_mat_exec *sjm_exec;
2995   while ((sjm_exec= it++))
2996   {
2997     TABLE_LIST *const sj_nest= sjm_exec->sj_nest;
2998 
2999     DBUG_ASSERT(!sj_nest->outer_join_nest());
3000     /*
3001       A materialized semi-join nest cannot actually be an inner part of an
3002       outer join yet, this is just a preparatory step,
3003       ie sj_nest->outer_join_nest() is always NULL here.
3004       @todo: Enable outer joining here later.
3005     */
3006     Item *cond= sj_nest->outer_join_nest() ?
3007                   sj_nest->outer_join_nest()->join_cond() :
3008                   conds;
3009     if (!cond)
3010       continue;
3011 
3012     uchar *dummy= NULL;
3013     cond= cond->compile(&Item::equality_substitution_analyzer, &dummy,
3014                         &Item::equality_substitution_transformer,
3015                         (uchar *)sj_nest);
3016     if (cond == NULL)
3017       return true;
3018 
3019     cond->update_used_tables();
3020 
3021     // Loop over all primary tables that follow the materialized table
3022     for (uint j= sjm_exec->mat_table_index + 1; j < primary_tables; j++)
3023     {
3024       JOIN_TAB *const tab= join_tab + j;
3025       for (Key_use *keyuse= tab->position->key;
3026            keyuse && keyuse->table == tab->table &&
3027            keyuse->key == tab->position->key->key;
3028            keyuse++)
3029       {
3030         List_iterator<Item> it(sj_nest->nested_join->sj_inner_exprs);
3031         Item *old;
3032         uint fieldno= 0;
3033         while ((old= it++))
3034         {
3035           if (old->real_item()->eq(keyuse->val->real_item(), false))
3036           {
3037             /*
3038               Replace the expression selected from the subquery with the
3039               corresponding column of the materialized temporary table.
3040             */
3041             keyuse->val= sj_nest->nested_join->sjm.mat_fields[fieldno];
3042             keyuse->used_tables= keyuse->val->used_tables();
3043             break;
3044           }
3045           fieldno++;
3046         }
3047       }
3048     }
3049   }
3050 
3051   return false;
3052 }
3053 
3054 
3055 /**
3056   Assign set of available (prefix) tables to all tables in query block.
3057   Also set added tables, ie the tables added in each JOIN_TAB compared to the
3058   previous JOIN_TAB.
3059   This function must be called for every query block after the table order
3060   has been determined.
3061 */
3062 
set_prefix_tables()3063 void JOIN::set_prefix_tables()
3064 {
3065   DBUG_ASSERT(!plan_is_const());
3066   /*
3067     The const tables are available together with the first non-const table in
3068     the join order.
3069   */
3070   table_map const initial_tables_map= const_table_map |
3071     (allow_outer_refs ? OUTER_REF_TABLE_BIT : 0);
3072 
3073   table_map current_tables_map= initial_tables_map;
3074   table_map prev_tables_map= (table_map) 0;
3075   table_map saved_tables_map= (table_map) 0;
3076 
3077   JOIN_TAB *last_non_sjm_tab= NULL; // Track the last non-sjm table
3078 
3079   for (uint i= const_tables; i < tables; i++)
3080   {
3081     JOIN_TAB *const tab= join_tab + i;
3082     if (!tab->table)
3083       continue;
3084     /*
3085       Tables that are within SJ-Materialization nests cannot have their
3086       conditions referring to preceding non-const tables.
3087        - If we're looking at the first SJM table, reset current_tables_map
3088          to refer to only allowed tables
3089       @see Item_equal::get_subst_item()
3090       @see eliminate_item_equal()
3091     */
3092     if (sj_is_materialize_strategy(tab->get_sj_strategy()))
3093     {
3094       const table_map sjm_inner_tables= tab->emb_sj_nest->sj_inner_tables;
3095       if (!(sjm_inner_tables & current_tables_map))
3096       {
3097         saved_tables_map= current_tables_map;
3098         current_tables_map= initial_tables_map;
3099         prev_tables_map= (table_map) 0;
3100       }
3101 
3102       current_tables_map|= tab->table->map;
3103       tab->set_prefix_tables(current_tables_map, prev_tables_map);
3104       prev_tables_map= current_tables_map;
3105 
3106       if (!(sjm_inner_tables & ~current_tables_map))
3107       {
3108         // At the end of a semi-join materialization nest, restore previous map
3109         current_tables_map= saved_tables_map;
3110         prev_tables_map= last_non_sjm_tab ?
3111                          last_non_sjm_tab->prefix_tables() : (table_map) 0;
3112       }
3113     }
3114     else
3115     {
3116       last_non_sjm_tab= tab;
3117       current_tables_map|= tab->table->map;
3118       tab->set_prefix_tables(current_tables_map, prev_tables_map);
3119       prev_tables_map= current_tables_map;
3120     }
3121   }
3122   /*
3123     Random expressions must be added to the last table's condition.
3124     It solves problem with queries like SELECT * FROM t1 WHERE rand() > 0.5
3125   */
3126   if (last_non_sjm_tab != NULL)
3127     last_non_sjm_tab->add_prefix_tables(RAND_TABLE_BIT);
3128 }
3129 
3130 
3131 /**
3132   Calculate best possible join order and initialize the join structure.
3133 
3134   @param  join          Join object that is populated with statistics data
3135   @param  tables_arg    List of tables that is referenced by this query
3136   @param  conds         Where condition of query
3137   @param  keyuse_array[out] Populated with key_use information
3138   @param  first_optimization True if first optimization of this query
3139 
3140   @return true if success, false if error
3141 
3142   @details
3143   Here is an overview of the logic of this function:
3144 
3145   - Initialize JOIN data structures and setup basic dependencies between tables.
3146 
3147   - Update dependencies based on join information.
3148 
3149   - Make key descriptions (update_ref_and_keys()).
3150 
3151   - Pull out semi-join tables based on table dependencies.
3152 
3153   - Extract tables with zero or one rows as const tables.
3154 
3155   - Read contents of const tables, substitute columns from these tables with
3156     actual data. Also keep track of empty tables vs. one-row tables.
3157 
3158   - After const table extraction based on row count, more tables may
3159     have become functionally dependent. Extract these as const tables.
3160 
3161   - Add new sargable predicates based on retrieved const values.
3162 
3163   - Calculate number of rows to be retrieved from each table.
3164 
3165   - Calculate cost of potential semi-join materializations.
3166 
3167   - Calculate best possible join order based on available statistics.
3168 
3169   - Fill in remaining information for the generated join order.
3170 */
3171 
3172 static bool
make_join_statistics(JOIN * join,TABLE_LIST * tables_arg,Item * conds,Key_use_array * keyuse_array,bool first_optimization)3173 make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, Item *conds,
3174                      Key_use_array *keyuse_array, bool first_optimization)
3175 {
3176   int error;
3177   THD *const thd= join->thd;
3178   TABLE_LIST *tables= tables_arg;
3179   uint i,const_count,key;
3180   const uint table_count= join->tables;
3181   table_map found_ref, refs;
3182   JOIN_TAB *stat,*stat_end,*s,**stat_ref;
3183   Key_use *keyuse, *start_keyuse;
3184   table_map outer_join= 0;
3185   SARGABLE_PARAM *sargables= 0;
3186   JOIN_TAB *stat_vector[MAX_TABLES+1];
3187   Opt_trace_context * const trace= &join->thd->opt_trace;
3188   DBUG_ENTER("make_join_statistics");
3189 
3190   stat= new (thd->mem_root) JOIN_TAB[table_count];
3191   stat_ref= (JOIN_TAB**) thd->alloc(sizeof(JOIN_TAB*)*MAX_TABLES);
3192   if (!stat || !stat_ref)
3193     DBUG_RETURN(true);
3194 
3195   if (!(join->positions=
3196         new (thd->mem_root) POSITION[table_count+1]))
3197     DBUG_RETURN(true);
3198 
3199   // Up to one extra slot per semi-join nest is needed (if materialized)
3200   uint sj_nests= join->select_lex->sj_nests.elements;
3201   if (!(join->best_positions=
3202       new (thd->mem_root) POSITION[table_count + sj_nests + 1]))
3203     DBUG_RETURN(true);
3204 
3205   join->best_ref= stat_vector;
3206 
3207   stat_end= stat+table_count;
3208   join->const_table_map= 0;
3209   join->found_const_table_map= 0;
3210   join->all_table_map= 0;
3211   const_count= 0;
3212 
3213   /*
3214     Initialize data structures for tables to be joined.
3215     Initialize dependencies between tables.
3216   */
3217   for (s= stat, i= 0;
3218        tables;
3219        s++, tables= tables->next_leaf, i++)
3220   {
3221     stat_vector[i]=s;
3222     TABLE *const table= tables->table;
3223     s->table= table;
3224     table->pos_in_table_list= tables;
3225     error= tables->fetch_number_of_rows();
3226 
3227     DBUG_EXECUTE_IF("bug11747970_raise_error",
3228                     {
3229                       if (!error)
3230                       {
3231                         my_error(ER_UNKNOWN_ERROR, MYF(0));
3232                         goto error;
3233                       }
3234                     });
3235 
3236     if (error)
3237     {
3238       table->file->print_error(error, MYF(0));
3239       goto error;
3240     }
3241     table->quick_keys.clear_all();
3242     table->possible_quick_keys.clear_all();
3243     table->reginfo.join_tab=s;
3244     table->reginfo.not_exists_optimize=0;
3245     memset(table->const_key_parts, 0, sizeof(key_part_map)*table->s->keys);
3246     join->all_table_map|= table->map;
3247     s->join=join;
3248 
3249     s->dependent= tables->dep_tables;
3250     if (tables->schema_table)
3251       table->file->stats.records= 2;
3252     table->quick_condition_rows= table->file->stats.records;
3253 
3254     s->on_expr_ref= tables->join_cond_ref();
3255 
3256     if (tables->outer_join_nest())
3257     {
3258       /* s belongs to a nested join, maybe to several embedding joins */
3259       s->embedding_map= 0;
3260       for (TABLE_LIST *embedding= tables->embedding;
3261            embedding;
3262            embedding= embedding->embedding)
3263       {
3264         NESTED_JOIN *nested_join= embedding->nested_join;
3265         s->embedding_map|=nested_join->nj_map;
3266         s->dependent|= embedding->dep_tables;
3267         outer_join|= nested_join->used_tables;
3268       }
3269     }
3270     else if (*s->on_expr_ref)
3271     {
3272       /* s is the only inner table of an outer join */
3273       outer_join|= table->map;
3274       s->embedding_map= 0;
3275       for (TABLE_LIST *embedding= tables->embedding;
3276            embedding;
3277            embedding= embedding->embedding)
3278         s->embedding_map|= embedding->nested_join->nj_map;
3279     }
3280   }
3281   stat_vector[i]=0;
3282   join->outer_join=outer_join;
3283 
3284   if (join->outer_join)
3285   {
3286     /*
3287        Complete the dependency analysis.
3288        Build transitive closure for relation 'to be dependent on'.
3289        This will speed up the plan search for many cases with outer joins,
3290        as well as allow us to catch illegal cross references.
3291        Warshall's algorithm is used to build the transitive closure.
3292        As we may restart the outer loop upto 'table_count' times, the
3293        complexity of the algorithm is O((number of tables)^3).
3294        However, most of the iterations will be shortcircuited when
3295        there are no pedendencies to propogate.
3296     */
3297     for (i= 0 ; i < table_count ; i++)
3298     {
3299       TABLE *const table= stat[i].table;
3300 
3301       if (!table->reginfo.join_tab->dependent)
3302         continue;
3303 
3304       uint j;
3305       /* Add my dependencies to other tables depending on me */
3306       for (j= 0, s= stat ; j < table_count ; j++, s++)
3307       {
3308         if (s->dependent & table->map)
3309         {
3310           table_map was_dependent= s->dependent;
3311           s->dependent |= table->reginfo.join_tab->dependent;
3312           /*
3313             If we change dependencies for a table we already have
3314             processed: Redo dependency propagation from this table.
3315           */
3316           if (i > j && s->dependent != was_dependent)
3317           {
3318             i = j-1;
3319             break;
3320           }
3321         }
3322       }
3323     }
3324 
3325     for (i= 0, s= stat ; i < table_count ; i++, s++)
3326     {
3327       /* Catch illegal cross references for outer joins */
3328       if (s->dependent & s->table->map)
3329       {
3330         join->tables=0;			// Don't use join->table
3331         join->primary_tables= 0;
3332         my_message(ER_WRONG_OUTER_JOIN, ER(ER_WRONG_OUTER_JOIN), MYF(0));
3333         goto error;
3334       }
3335 
3336       if (outer_join & s->table->map)
3337         s->table->maybe_null= 1;
3338       s->key_dependent= s->dependent;
3339     }
3340   }
3341 
3342   if (unlikely(trace->is_started()))
3343     trace_table_dependencies(trace, stat, table_count);
3344 
3345   if (conds || outer_join)
3346     if (update_ref_and_keys(thd, keyuse_array, stat, join->tables,
3347                             conds, join->cond_equal,
3348                             ~outer_join, join->select_lex, &sargables))
3349       goto error;
3350 
3351   /*
3352     Pull out semi-join tables based on dependencies. Dependencies are valid
3353     throughout the lifetime of a query, so this operation can be performed
3354     on the first optimization only.
3355   */
3356   if (first_optimization && sj_nests)
3357   {
3358     if (pull_out_semijoin_tables(join))
3359       DBUG_RETURN(true);
3360     sj_nests= join->select_lex->sj_nests.elements;
3361   }
3362 
3363   /*
3364     Extract const tables based on row counts, must be done for each execution.
3365     Tables containing exactly zero or one rows are marked as const, but
3366     notice the additional constraints checked below.
3367     Tables that are extracted have their rows read before actual execution
3368     starts and are placed in the beginning of the join_tab array.
3369     Thus, they do not take part in join order optimization process,
3370     which can significantly reduce the optimization time.
3371     The data read from these tables can also be regarded as "constant"
3372     throughout query execution, hence the column values can be used for
3373     additional constant propagation and extraction of const tables based
3374     on eq-ref properties.
3375   */
3376   enum enum_const_table_extraction
3377   {
3378      extract_no_table=    0,
3379      extract_empty_table= 1,
3380      extract_const_table= 2
3381   };
3382 
3383   if (join->no_const_tables)
3384     goto const_table_extraction_done;
3385 
3386   for (i= 0, s= stat; i < table_count; i++, s++)
3387   {
3388     TABLE      *const table= s->table;
3389     TABLE_LIST *const tables= table->pos_in_table_list;
3390     enum enum_const_table_extraction extract_method= extract_const_table;
3391 
3392 #ifdef WITH_PARTITION_STORAGE_ENGINE
3393     const bool all_partitions_pruned_away= table->all_partitions_pruned_away;
3394 #else
3395     const bool all_partitions_pruned_away= false;
3396 #endif
3397 
3398     if (tables->outer_join_nest())
3399     {
3400       /*
3401         Table belongs to a nested join, no candidate for const table extraction.
3402       */
3403       extract_method= extract_no_table;
3404     }
3405     else if (tables->embedding && tables->embedding->sj_on_expr)
3406     {
3407       /*
3408         Table belongs to a semi-join.
3409         We do not currently pull out const tables from semi-join nests.
3410       */
3411       extract_method= extract_no_table;
3412     }
3413     else if (*s->on_expr_ref)
3414     {
3415       /* s is the only inner table of an outer join, extract empty tables */
3416       extract_method= extract_empty_table;
3417     }
3418     switch (extract_method)
3419     {
3420     case extract_no_table:
3421       break;
3422 
3423     case extract_empty_table:
3424       /* Extract tables with zero rows, but only if statistics are exact */
3425       if ((table->file->stats.records == 0 ||
3426            all_partitions_pruned_away) &&
3427           (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT))
3428         set_position(join, const_count++, s, NULL);
3429       break;
3430 
3431     case extract_const_table:
3432       /*
3433         Extract tables with zero or one rows, but do not extract tables that
3434          1. are dependent upon other tables, or
3435          2. have no exact statistics, or
3436          3. are full-text searched
3437       */
3438       if ((table->s->system ||
3439            table->file->stats.records <= 1 ||
3440            all_partitions_pruned_away) &&
3441           !s->dependent &&                                               // 1
3442           (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) && // 2
3443           !table->fulltext_searched)                                     // 3
3444         set_position(join, const_count++, s, NULL);
3445       break;
3446     }
3447   }
3448   /* Read const tables (tables matching no more than 1 rows) */
3449 
3450   for (POSITION *p_pos=join->positions, *p_end=p_pos+const_count;
3451        p_pos < p_end ;
3452        p_pos++)
3453   {
3454     int tmp;
3455     s= p_pos->table;
3456     s->type=JT_SYSTEM;
3457     join->const_table_map|=s->table->map;
3458     if ((tmp=join_read_const_table(s, p_pos)))
3459     {
3460       if (tmp > 0)
3461 	goto error;		// Fatal error
3462     }
3463     else
3464     {
3465       join->found_const_table_map|= s->table->map;
3466       s->table->pos_in_table_list->optimized_away= TRUE;
3467     }
3468   }
3469 
3470 const_table_extraction_done:
3471   /* loop until no more const tables are found */
3472   int ref_changed;
3473   do
3474   {
3475   more_const_tables_found:
3476     ref_changed = 0;
3477     found_ref=0;
3478 
3479     /*
3480       We only have to loop from stat_vector + const_count as
3481       set_position() will move all const_tables first in stat_vector
3482     */
3483 
3484     for (JOIN_TAB **pos=stat_vector+const_count ; (s= *pos) ; pos++)
3485     {
3486       TABLE *const table= s->table;
3487       TABLE_LIST *const tl= table->pos_in_table_list;
3488       /*
3489         If equi-join condition by a key is null rejecting and after a
3490         substitution of a const table the key value happens to be null
3491         then we can state that there are no matches for this equi-join.
3492       */
3493       if ((keyuse= s->keyuse) && *s->on_expr_ref && !s->embedding_map)
3494       {
3495         /*
3496           When performing an outer join operation if there are no matching rows
3497           for the single row of the outer table all the inner tables are to be
3498           null complemented and thus considered as constant tables.
3499           Here we apply this consideration to the case of outer join operations
3500           with a single inner table only because the case with nested tables
3501           would require a more thorough analysis.
3502           TODO. Apply single row substitution to null complemented inner tables
3503           for nested outer join operations.
3504 	*/
3505         while (keyuse->table == table)
3506         {
3507           if (!(keyuse->val->used_tables() & ~join->const_table_map) &&
3508               keyuse->val->is_null() && keyuse->null_rejecting)
3509           {
3510             s->type= JT_CONST;
3511             mark_as_null_row(table);
3512             join->found_const_table_map|= table->map;
3513 	    join->const_table_map|= table->map;
3514 	    set_position(join, const_count++, s, NULL);
3515             goto more_const_tables_found;
3516            }
3517 	  keyuse++;
3518         }
3519       }
3520 
3521       if (s->dependent)				// If dependent on some table
3522       {
3523 	// All dep. must be constants
3524         if (s->dependent & ~(join->const_table_map))
3525 	  continue;
3526         /*
3527           Mark a dependent table as constant if
3528            1. it has exactly zero or one rows (it is a system table), and
3529            2. it is not within a nested outer join, and
3530            3. it does not have an expensive outer join condition.
3531               This is because we have to determine whether an outer-joined table
3532               has a real row or a null-extended row in the optimizer phase.
3533               We have no possibility to evaluate its join condition at
3534               execution time, when it is marked as a system table.
3535         */
3536 	if (table->file->stats.records <= 1L &&                            // 1
3537             (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) && // 1
3538             !tl->outer_join_nest() &&                                      // 2
3539             !(*s->on_expr_ref && (*s->on_expr_ref)->is_expensive()))       // 3
3540 	{					// system table
3541 	  int tmp= 0;
3542 	  s->type=JT_SYSTEM;
3543 	  join->const_table_map|=table->map;
3544 	  set_position(join, const_count++, s, NULL);
3545 	  if ((tmp= join_read_const_table(s, join->positions+const_count-1)))
3546 	  {
3547 	    if (tmp > 0)
3548 	      goto error;			// Fatal error
3549 	  }
3550 	  else
3551 	    join->found_const_table_map|= table->map;
3552 	  continue;
3553 	}
3554       }
3555       /* check if table can be read by key or table only uses const refs */
3556       if ((keyuse=s->keyuse))
3557       {
3558 	s->type= JT_REF;
3559 	while (keyuse->table == table)
3560 	{
3561 	  start_keyuse=keyuse;
3562 	  key=keyuse->key;
3563 	  s->keys.set_bit(key);               // QQ: remove this ?
3564 
3565 	  refs=0;
3566           key_map const_ref, eq_part;
3567 	  do
3568 	  {
3569 	    if (keyuse->val->type() != Item::NULL_ITEM && !keyuse->optimize)
3570 	    {
3571 	      if (!((~join->found_const_table_map) & keyuse->used_tables))
3572 		const_ref.set_bit(keyuse->keypart);
3573 	      else
3574 		refs|=keyuse->used_tables;
3575 	      eq_part.set_bit(keyuse->keypart);
3576 	    }
3577 	    keyuse++;
3578 	  } while (keyuse->table == table && keyuse->key == key);
3579 
3580           /*
3581             Extract const tables with proper key dependencies.
3582             Exclude tables that
3583              1. are full-text searched, or
3584              2. are part of nested outer join, or
3585              3. are part of semi-join, or
3586              4. have an expensive outer join condition.
3587              5. are blocked by handler for const table optimize.
3588           */
3589 	  if (eq_part.is_prefix(table->key_info[key].user_defined_key_parts) &&
3590               !table->fulltext_searched &&                           // 1
3591               !tl->outer_join_nest() &&                              // 2
3592               !(tl->embedding && tl->embedding->sj_on_expr) &&       // 3
3593               !(*s->on_expr_ref && (*s->on_expr_ref)->is_expensive()) &&// 4
3594               !(table->file->ha_table_flags() & HA_BLOCK_CONST_TABLE))  // 5
3595 	  {
3596             if (table->key_info[key].flags & HA_NOSAME)
3597             {
3598 	      if (const_ref == eq_part)
3599 	      {					// Found everything for ref.
3600 	        int tmp;
3601 	        ref_changed = 1;
3602 	        s->type= JT_CONST;
3603 	        join->const_table_map|=table->map;
3604 	        set_position(join,const_count++,s,start_keyuse);
3605 	        if (create_ref_for_key(join, s, start_keyuse,
3606 				       join->found_const_table_map))
3607                   goto error;
3608 	        if ((tmp=join_read_const_table(s,
3609                                                join->positions+const_count-1)))
3610 	        {
3611 		  if (tmp > 0)
3612 		    goto error;			// Fatal error
3613 	        }
3614 	        else
3615 		  join->found_const_table_map|= table->map;
3616 	        break;
3617 	      }
3618 	      else
3619 	        found_ref|= refs;      // Table is const if all refs are const
3620 	    }
3621             else if (const_ref == eq_part)
3622               s->const_keys.set_bit(key);
3623           }
3624 	}
3625       }
3626     }
3627   } while (join->const_table_map & found_ref && ref_changed);
3628 
3629   /*
3630     Update info on indexes that can be used for search lookups as
3631     reading const tables may has added new sargable predicates.
3632   */
3633   if (const_count && sargables)
3634   {
3635     for( ; sargables->field ; sargables++)
3636     {
3637       Field *field= sargables->field;
3638       JOIN_TAB *join_tab= field->table->reginfo.join_tab;
3639       key_map possible_keys= field->key_start;
3640       possible_keys.intersect(field->table->keys_in_use_for_query);
3641       bool is_const= 1;
3642       for (uint j=0; j < sargables->num_values; j++)
3643         is_const&= sargables->arg_value[j]->const_item();
3644       if (is_const)
3645       {
3646         join_tab->const_keys.merge(possible_keys);
3647         join_tab->keys.merge(possible_keys);
3648       }
3649     }
3650   }
3651 
3652   {
3653     Opt_trace_object trace_wrapper(trace);
3654     /* Calc how many (possible) matched records in each table */
3655     Opt_trace_array trace_records(trace, "rows_estimation");
3656 
3657     for (s= stat ; s < stat_end ; s++)
3658     {
3659       Opt_trace_object trace_table(trace);
3660       trace_table.add_utf8_table(s->table);
3661       if (s->type == JT_SYSTEM || s->type == JT_CONST)
3662       {
3663         trace_table.add("rows", 1).add("cost", 1)
3664           .add_alnum("table_type", (s->type == JT_SYSTEM) ? "system": "const")
3665           .add("empty", static_cast<bool>(s->table->null_row));
3666 
3667         /* Only one matching row */
3668         s->found_records= s->records= s->read_time=1; s->worst_seeks= 1.0;
3669         continue;
3670       }
3671       /* Approximate found rows and time to read them */
3672       s->found_records= s->records= s->table->file->stats.records;
3673       s->read_time= (ha_rows) s->table->file->scan_time();
3674 
3675       /*
3676         Set a max range of how many seeks we can expect when using keys
3677         This is can't be to high as otherwise we are likely to use
3678         table scan.
3679       */
3680       s->worst_seeks= min((double) s->found_records / 10,
3681                           (double) s->read_time * 3);
3682       if (s->worst_seeks < 2.0)                 // Fix for small tables
3683         s->worst_seeks= 2.0;
3684 
3685       /*
3686         Add to stat->const_keys those indexes for which all group fields or
3687         all select distinct fields participate in one index.
3688       */
3689       add_group_and_distinct_keys(join, s);
3690 
3691       /*
3692         Perform range analysis if there are keys it could use (1).
3693         Don't do range analysis if on the inner side of an outer join (2).
3694         Do range analysis if on the inner side of a semi-join (3).
3695       */
3696       TABLE_LIST *const tl= s->table->pos_in_table_list;
3697       if (!s->const_keys.is_clear_all() &&                        // (1)
3698           (!tl->embedding ||                                      // (2)
3699            (tl->embedding && tl->embedding->sj_on_expr)))         // (3)
3700       {
3701         ha_rows records;
3702         SQL_SELECT *select;
3703         select= make_select(s->table, join->found_const_table_map,
3704                             join->found_const_table_map,
3705                             *s->on_expr_ref ? *s->on_expr_ref : conds,
3706                             1, &error);
3707         if (!select)
3708           goto error;
3709         records= get_quick_record_count(thd, select, s->table,
3710                                         &s->const_keys, join->row_limit);
3711 
3712         if (records == 0 && thd->is_fatal_error)
3713           DBUG_RETURN(true);
3714 
3715         s->quick= select->quick;
3716         s->needed_reg= select->needed_reg;
3717         select->quick= 0;
3718         /*
3719           Check for "impossible range", but make sure that we do not attempt
3720           to mark semi-joined tables as "const" (only semi-joined tables that
3721           are functionally dependent can be marked "const", and subsequently
3722           pulled out of their semi-join nests).
3723         */
3724         if (records == 0 &&
3725             s->table->reginfo.impossible_range &&
3726             (!(tl->embedding && tl->embedding->sj_on_expr)))
3727         {
3728           /*
3729             Impossible WHERE or ON expression
3730             In case of ON, we mark that the we match one empty NULL row.
3731             In case of WHERE, don't set found_const_table_map to get the
3732             caller to abort with a zero row result.
3733           */
3734           join->const_table_map|= s->table->map;
3735           set_position(join, const_count++, s, NULL);
3736           s->type= JT_CONST;
3737           if (*s->on_expr_ref)
3738           {
3739             /* Generate empty row */
3740             s->info= ET_IMPOSSIBLE_ON_CONDITION;
3741             trace_table.add("returning_empty_null_row", true).
3742               add_alnum("cause", "impossible_on_condition");
3743             join->found_const_table_map|= s->table->map;
3744             s->type= JT_CONST;
3745             mark_as_null_row(s->table);         // All fields are NULL
3746           }
3747           else
3748           {
3749             trace_table.add("rows", 0).
3750               add_alnum("cause", "impossible_where_condition");
3751           }
3752         }
3753         if (records != HA_POS_ERROR)
3754         {
3755           s->found_records= records;
3756           s->read_time= (ha_rows) (s->quick ? s->quick->read_time : 0.0);
3757         }
3758         delete select;
3759       }
3760       else
3761         Opt_trace_object(trace, "table_scan").
3762           add("rows", s->found_records).
3763           add("cost", s->read_time);
3764     }
3765   }
3766 
3767   join->join_tab=stat;
3768   join->map2table=stat_ref;
3769   join->const_tables=const_count;
3770 
3771   if (sj_nests)
3772     join->set_semijoin_embedding();
3773 
3774   if (!join->plan_is_const())
3775     optimize_keyuse(join, keyuse_array);
3776 
3777   join->allow_outer_refs= true;
3778 
3779   if (sj_nests && optimize_semijoin_nests_for_materialization(join))
3780     DBUG_RETURN(true);
3781 
3782   if (Optimize_table_order(thd, join, NULL).choose_table_order())
3783     DBUG_RETURN(true);
3784 
3785   DBUG_EXECUTE_IF("bug13820776_1", thd->killed= THD::KILL_QUERY;);
3786   if (thd->killed || thd->is_error())
3787     DBUG_RETURN(true);
3788 
3789   if (join->unit->item && join->decide_subquery_strategy())
3790     DBUG_RETURN(true);
3791 
3792   join->refine_best_rowcount();
3793 
3794   // Only best_positions should be needed from now on.
3795   join->positions= NULL;
3796   join->best_ref= NULL;
3797 
3798   /*
3799     Store the cost of this query into a user variable
3800     Don't update last_query_cost for statements that are not "flat joins" :
3801     i.e. they have subqueries, unions or call stored procedures.
3802     TODO: calculate a correct cost for a query with subqueries and UNIONs.
3803   */
3804   if (thd->lex->is_single_level_stmt())
3805     thd->status_var.last_query_cost= join->best_read;
3806 
3807   /* Generate an execution plan from the found optimal join order. */
3808   if (join->get_best_combination())
3809     DBUG_RETURN(true);
3810 
3811   // No need for this struct after new JOIN_TAB array is set up.
3812   join->best_positions= NULL;
3813 
3814   /* Some called function may still set thd->is_fatal_error unnoticed */
3815   if (thd->is_fatal_error)
3816     DBUG_RETURN(true);
3817 
3818   DBUG_RETURN(false);
3819 
3820 error:
3821   /*
3822     Need to clean up join_tab from TABLEs in case of error.
3823     They won't get cleaned up by JOIN::cleanup() because JOIN::join_tab
3824     may not be assigned yet by this function (which is building join_tab).
3825     Dangling TABLE::reginfo.join_tab may cause part_of_refkey to choke.
3826   */
3827   for (tables= tables_arg; tables; tables= tables->next_leaf)
3828     tables->table->reginfo.join_tab= NULL;
3829   DBUG_RETURN(true);
3830 }
3831 
3832 
3833 /**
3834   Set semi-join embedding join nest pointers.
3835 
3836   Set pointer to embedding semi-join nest for all semi-joined tables.
3837   Note that this must be done for every table inside all semi-join nests,
3838   even for tables within outer join nests embedded in semi-join nests.
3839   A table can never be part of multiple semi-join nests, hence no
3840   ambiguities can ever occur.
3841   Note also that the pointer is not set for TABLE_LIST objects that
3842   are outer join nests within semi-join nests.
3843 */
3844 
set_semijoin_embedding()3845 void JOIN::set_semijoin_embedding()
3846 {
3847   DBUG_ASSERT(!select_lex->sj_nests.is_empty());
3848 
3849   JOIN_TAB *const tab_end= join_tab + primary_tables;
3850 
3851   for (JOIN_TAB *tab= join_tab; tab < tab_end; tab++)
3852   {
3853     for (TABLE_LIST *tr= tab->table->pos_in_table_list;
3854          tr->embedding;
3855          tr= tr->embedding)
3856     {
3857       if (tr->embedding->sj_on_expr)
3858       {
3859         tab->emb_sj_nest= tr->embedding;
3860         break;
3861       }
3862     }
3863   }
3864 }
3865 
3866 
3867 /**
3868   @brief Check if semijoin's compared types allow materialization.
3869 
3870   @param[inout] sj_nest Semi-join nest containing information about correlated
3871          expressions. Set nested_join->sjm.scan_allowed to TRUE if
3872          MaterializeScan strategy allowed. Set nested_join->sjm.lookup_allowed
3873          to TRUE if MaterializeLookup strategy allowed
3874 
3875   @details
3876     This is a temporary fix for BUG#36752.
3877 
3878     There are two subquery materialization strategies for semijoin:
3879 
3880     1. Materialize and do index lookups in the materialized table. See
3881        BUG#36752 for description of restrictions we need to put on the
3882        compared expressions.
3883 
3884        In addition, since indexes are not supported for BLOB columns,
3885        this strategy can not be used if any of the columns in the
3886        materialized table will be BLOB/GEOMETRY columns.  (Note that
3887        also columns for non-BLOB values that may be greater in size
3888        than CONVERT_IF_BIGGER_TO_BLOB, will be represented as BLOB
3889        columns.)
3890 
3891     2. Materialize and then do a full scan of the materialized table.
3892        The same criteria as for MaterializeLookup are applied, except that
3893        BLOB/GEOMETRY columns are allowed.
3894 */
3895 
3896 static
semijoin_types_allow_materialization(TABLE_LIST * sj_nest)3897 void semijoin_types_allow_materialization(TABLE_LIST *sj_nest)
3898 {
3899   DBUG_ENTER("semijoin_types_allow_materialization");
3900 
3901   DBUG_ASSERT(sj_nest->nested_join->sj_outer_exprs.elements ==
3902               sj_nest->nested_join->sj_inner_exprs.elements);
3903 
3904   if (sj_nest->nested_join->sj_outer_exprs.elements > MAX_REF_PARTS)
3905   {
3906     sj_nest->nested_join->sjm.scan_allowed= false;
3907     sj_nest->nested_join->sjm.lookup_allowed= false;
3908     DBUG_VOID_RETURN;
3909   }
3910 
3911   List_iterator<Item> it1(sj_nest->nested_join->sj_outer_exprs);
3912   List_iterator<Item> it2(sj_nest->nested_join->sj_inner_exprs);
3913 
3914   sj_nest->nested_join->sjm.scan_allowed= false;
3915   sj_nest->nested_join->sjm.lookup_allowed= false;
3916 
3917   bool blobs_involved= false;
3918   Item *outer, *inner;
3919   while (outer= it1++, inner= it2++)
3920   {
3921     if (!types_allow_materialization(outer, inner))
3922       DBUG_VOID_RETURN;
3923     blobs_involved|= inner->is_blob_field();
3924   }
3925   sj_nest->nested_join->sjm.scan_allowed=   true;
3926   sj_nest->nested_join->sjm.lookup_allowed= !blobs_involved;
3927 
3928   if (sj_nest->embedding)
3929   {
3930     DBUG_ASSERT(sj_nest->embedding->join_cond());
3931     /*
3932       There are two issues that prevent materialization strategy from being
3933       used when a semi-join nest is on the inner side of an outer join:
3934       1. If the semi-join contains dependencies to outer tables,
3935          materialize-scan strategy cannot be used.
3936       2. Make sure that executor is able to evaluate triggered conditions
3937          for semi-join materialized tables. It should be correct, but needs
3938          verification.
3939          TODO: Remove this limitation!
3940       Handle this by disabling materialization strategies:
3941     */
3942     sj_nest->nested_join->sjm.scan_allowed= false;
3943     sj_nest->nested_join->sjm.lookup_allowed= false;
3944     DBUG_VOID_RETURN;
3945   }
3946 
3947   DBUG_PRINT("info",("semijoin_types_allow_materialization: ok, allowed"));
3948 
3949   DBUG_VOID_RETURN;
3950 }
3951 
3952 
3953 /*****************************************************************************
3954   Create JOIN_TABS, make a guess about the table types,
3955   Approximate how many records will be used in each table
3956 *****************************************************************************/
3957 
3958 /**
3959   @brief
3960   Returns estimated number of rows that could be fetched by given select
3961 
3962   @param thd    thread handle
3963   @param select select to test
3964   @param table  source table
3965   @param keys   allowed keys
3966   @param limit  select limit
3967 
3968   @notes
3969     In case of valid range, a QUICK_SELECT_I object will be constructed and
3970     saved in select->quick.
3971 
3972   @return
3973     HA_POS_ERROR for derived tables/views or if an error occur.
3974     Otherwise, estimated number of rows.
3975 */
3976 
get_quick_record_count(THD * thd,SQL_SELECT * select,TABLE * table,const key_map * keys,ha_rows limit)3977 static ha_rows get_quick_record_count(THD *thd, SQL_SELECT *select,
3978 				      TABLE *table,
3979 				      const key_map *keys,ha_rows limit)
3980 {
3981   DBUG_ENTER("get_quick_record_count");
3982   uchar buff[STACK_BUFF_ALLOC];
3983   if (check_stack_overrun(thd, STACK_MIN_SIZE, buff))
3984     DBUG_RETURN(0);                           // Fatal error flag is set
3985 
3986   DBUG_ASSERT(select);
3987 
3988   TABLE_LIST *const tl= table->pos_in_table_list;
3989 
3990   // Derived tables aren't filled yet, so no stats are available.
3991   if (!tl->uses_materialization())
3992   {
3993     select->head=table;
3994     int error= select->test_quick_select(thd,
3995                                          *keys,
3996                                          0,      //empty table_map
3997                                          limit,
3998                                          false,  //don't force quick range
3999                                          ORDER::ORDER_NOT_RELEVANT);
4000     if (error == 1)
4001       DBUG_RETURN(select->quick->records);
4002     if (error == -1)
4003     {
4004       table->reginfo.impossible_range=1;
4005       DBUG_RETURN(0);
4006     }
4007     DBUG_PRINT("warning",("Couldn't use record count on const keypart"));
4008   }
4009   else if (tl->materializable_is_const())
4010   {
4011     DBUG_RETURN(tl->get_unit()->get_result()->estimated_rowcount);
4012   }
4013   DBUG_RETURN(HA_POS_ERROR);
4014 }
4015 
4016 /*
4017   Get estimated record length for semi-join materialization temptable
4018 
4019   SYNOPSIS
4020     get_tmp_table_rec_length()
4021       items  IN subquery's select list.
4022 
4023   DESCRIPTION
4024     Calculate estimated record length for semi-join materialization
4025     temptable. It's an estimate because we don't follow every bit of
4026     create_tmp_table()'s logic. This isn't necessary as the return value of
4027     this function is used only for cost calculations.
4028 
4029   RETURN
4030     Length of the temptable record, in bytes
4031 */
4032 
get_tmp_table_rec_length(List<Item> & items)4033 static uint get_tmp_table_rec_length(List<Item> &items)
4034 {
4035   uint len= 0;
4036   Item *item;
4037   List_iterator<Item> it(items);
4038   while ((item= it++))
4039   {
4040     switch (item->result_type()) {
4041     case REAL_RESULT:
4042       len += sizeof(double);
4043       break;
4044     case INT_RESULT:
4045       if (item->max_length >= (MY_INT32_NUM_DECIMAL_DIGITS - 1))
4046         len += 8;
4047       else
4048         len += 4;
4049       break;
4050     case STRING_RESULT:
4051       /* DATE/TIME and GEOMETRY fields have STRING_RESULT result type.  */
4052       if (item->is_temporal() || item->field_type() == MYSQL_TYPE_GEOMETRY)
4053         len += 8;
4054       else
4055         len += item->max_length;
4056       break;
4057     case DECIMAL_RESULT:
4058       len += 10;
4059       break;
4060     case ROW_RESULT:
4061     default:
4062       DBUG_ASSERT(0); /* purecov: deadcode */
4063       break;
4064     }
4065   }
4066   return len;
4067 }
4068 
4069 
4070 /**
4071    Writes to the optimizer trace information about dependencies between
4072    tables.
4073    @param trace  optimizer trace
4074    @param join_tabs  all JOIN_TABs of the join
4075    @param table_count how many JOIN_TABs in the 'join_tabs' array
4076 */
trace_table_dependencies(Opt_trace_context * trace,JOIN_TAB * join_tabs,uint table_count)4077 static void trace_table_dependencies(Opt_trace_context * trace,
4078                                      JOIN_TAB *join_tabs,
4079                                      uint table_count)
4080 {
4081   Opt_trace_object trace_wrapper(trace);
4082   Opt_trace_array trace_dep(trace, "table_dependencies");
4083   for (uint i= 0 ; i < table_count ; i++)
4084   {
4085     const TABLE *table= join_tabs[i].table;
4086     Opt_trace_object trace_one_table(trace);
4087     trace_one_table.add_utf8_table(table).
4088       add("row_may_be_null", table->maybe_null != 0);
4089     DBUG_ASSERT(table->map < (1ULL << table_count));
4090     for (uint j= 0; j < table_count; j++)
4091     {
4092       if (table->map & (1ULL << j))
4093       {
4094         trace_one_table.add("map_bit", j);
4095         break;
4096       }
4097     }
4098     Opt_trace_array depends_on(trace, "depends_on_map_bits");
4099     // RAND_TABLE_BIT may be in join_tabs[i].dependent, so we test all 64 bits
4100     compile_time_assert(sizeof(table->map) <= 64);
4101     for (uint j= 0; j < 64; j++)
4102     {
4103       if (join_tabs[i].dependent & (1ULL << j))
4104         depends_on.add(j);
4105     }
4106   }
4107 }
4108 
4109 
4110 /**
4111   Add to join_tab[i]->condition() "table.field IS NOT NULL" conditions
4112   we've inferred from ref/eq_ref access performed.
4113 
4114     This function is a part of "Early NULL-values filtering for ref access"
4115     optimization.
4116 
4117     Example of this optimization:
4118     For query SELECT * FROM t1,t2 WHERE t2.key=t1.field @n
4119     and plan " any-access(t1), ref(t2.key=t1.field) " @n
4120     add "t1.field IS NOT NULL" to t1's table condition. @n
4121 
4122     Description of the optimization:
4123 
4124       We look through equalities choosen to perform ref/eq_ref access,
4125       pick equalities that have form "tbl.part_of_key = othertbl.field"
4126       (where othertbl is a non-const table and othertbl.field may be NULL)
4127       and add them to conditions on correspoding tables (othertbl in this
4128       example).
4129 
4130       Exception from that is the case when referred_tab->join != join.
4131       I.e. don't add NOT NULL constraints from any embedded subquery.
4132       Consider this query:
4133       @code
4134       SELECT A.f2 FROM t1 LEFT JOIN t2 A ON A.f2 = f1
4135       WHERE A.f3=(SELECT MIN(f3) FROM  t2 C WHERE A.f4 = C.f4) OR A.f3 IS NULL;
4136       @endcode
4137       Here condition A.f3 IS NOT NULL is going to be added to the WHERE
4138       condition of the embedding query.
4139       Another example:
4140       SELECT * FROM t10, t11 WHERE (t10.a < 10 OR t10.a IS NULL)
4141       AND t11.b <=> t10.b AND (t11.a = (SELECT MAX(a) FROM t12
4142       WHERE t12.b = t10.a ));
4143       Here condition t10.a IS NOT NULL is going to be added.
4144       In both cases addition of NOT NULL condition will erroneously reject
4145       some rows of the result set.
4146       referred_tab->join != join constraint would disallow such additions.
4147 
4148       This optimization doesn't affect the choices that ref, range, or join
4149       optimizer make. This was intentional because this was added after 4.1
4150       was GA.
4151 
4152     Implementation overview
4153       1. update_ref_and_keys() accumulates info about null-rejecting
4154          predicates in in Key_field::null_rejecting
4155       1.1 add_key_part saves these to Key_use.
4156       2. create_ref_for_key copies them to TABLE_REF.
4157       3. add_not_null_conds adds "x IS NOT NULL" to join_tab->m_condition of
4158          appropiate JOIN_TAB members.
4159 */
4160 
add_not_null_conds(JOIN * join)4161 static void add_not_null_conds(JOIN *join)
4162 {
4163   DBUG_ENTER("add_not_null_conds");
4164   for (uint i=join->const_tables ; i < join->tables ; i++)
4165   {
4166     JOIN_TAB *tab=join->join_tab+i;
4167     if ((tab->type == JT_REF || tab->type == JT_EQ_REF ||
4168          tab->type == JT_REF_OR_NULL) &&
4169         !tab->table->maybe_null)
4170     {
4171       for (uint keypart= 0; keypart < tab->ref.key_parts; keypart++)
4172       {
4173         if (tab->ref.null_rejecting & ((key_part_map)1 << keypart))
4174         {
4175           Item *item= tab->ref.items[keypart];
4176           Item *notnull;
4177           Item *real= item->real_item();
4178           DBUG_ASSERT(real->type() == Item::FIELD_ITEM);
4179           Item_field *not_null_item= (Item_field*)real;
4180           JOIN_TAB *referred_tab= not_null_item->field->table->reginfo.join_tab;
4181           /*
4182             For UPDATE queries such as:
4183             UPDATE t1 SET t1.f2=(SELECT MAX(t2.f4) FROM t2 WHERE t2.f3=t1.f1);
4184             not_null_item is the t1.f1, but it's referred_tab is 0.
4185           */
4186           if (!referred_tab || referred_tab->join != join)
4187             continue;
4188           if (!(notnull= new Item_func_isnotnull(not_null_item)))
4189             DBUG_VOID_RETURN;
4190           /*
4191             We need to do full fix_fields() call here in order to have correct
4192             notnull->const_item(). This is needed e.g. by test_quick_select
4193             when it is called from make_join_select after this function is
4194             called.
4195           */
4196           if (notnull->fix_fields(join->thd, &notnull))
4197             DBUG_VOID_RETURN;
4198           DBUG_EXECUTE("where",print_where(notnull,
4199                                            referred_tab->table->alias,
4200                                            QT_ORDINARY););
4201           referred_tab->and_with_condition(notnull, __LINE__);
4202         }
4203       }
4204     }
4205   }
4206   DBUG_VOID_RETURN;
4207 }
4208 
4209 
4210 /**
4211   Check if given expression only uses fields covered by index #keyno in the
4212   table tbl. The expression can use any fields in any other tables.
4213 
4214   The expression is guaranteed not to be AND or OR - those constructs are
4215   handled outside of this function.
4216 
4217   Restrict some function types from being pushed down to storage engine:
4218   a) Don't push down the triggered conditions. Nested outer joins execution
4219      code may need to evaluate a condition several times (both triggered and
4220      untriggered).
4221   b) Stored functions contain a statement that might start new operations (like
4222      DML statements) from within the storage engine. This does not work against
4223      all SEs.
4224   c) Subqueries might contain nested subqueries and involve more tables.
4225 
4226   @param  item           Expression to check
4227   @param  tbl            The table having the index
4228   @param  keyno          The index number
4229   @param  other_tbls_ok  TRUE <=> Fields of other non-const tables are allowed
4230 
4231   @return false if No, true if Yes
4232 */
4233 
uses_index_fields_only(Item * item,TABLE * tbl,uint keyno,bool other_tbls_ok)4234 bool uses_index_fields_only(Item *item, TABLE *tbl, uint keyno,
4235                             bool other_tbls_ok)
4236 {
4237   // Restrictions b and c.
4238   if (item->has_stored_program() || item->has_subquery())
4239     return false;
4240 
4241   if (item->const_item())
4242     return true;
4243 
4244   const Item::Type item_type= item->type();
4245 
4246   switch (item_type) {
4247   case Item::FUNC_ITEM:
4248     {
4249       Item_func *item_func= (Item_func*)item;
4250       const Item_func::Functype func_type= item_func->functype();
4251 
4252       /*
4253         Restriction a.
4254         TODO: Consider cloning the triggered condition and using the copies
4255         for:
4256         1. push the first copy down, to have most restrictive index condition
4257            possible.
4258         2. Put the second copy into tab->m_condition.
4259       */
4260       if (func_type == Item_func::TRIG_COND_FUNC)
4261         return false;
4262 
4263       /* This is a function, apply condition recursively to arguments */
4264       if (item_func->argument_count() > 0)
4265       {
4266         Item **item_end= (item_func->arguments()) + item_func->argument_count();
4267         for (Item **child= item_func->arguments(); child != item_end; child++)
4268         {
4269           if (!uses_index_fields_only(*child, tbl, keyno, other_tbls_ok))
4270             return FALSE;
4271         }
4272       }
4273       return TRUE;
4274     }
4275   case Item::COND_ITEM:
4276     {
4277       /*
4278         This is a AND/OR condition. Regular AND/OR clauses are handled by
4279         make_cond_for_index() which will chop off the part that can be
4280         checked with index. This code is for handling non-top-level AND/ORs,
4281         e.g. func(x AND y).
4282       */
4283       List_iterator<Item> li(*((Item_cond*)item)->argument_list());
4284       Item *item;
4285       while ((item=li++))
4286       {
4287         if (!uses_index_fields_only(item, tbl, keyno, other_tbls_ok))
4288           return FALSE;
4289       }
4290       return TRUE;
4291     }
4292   case Item::FIELD_ITEM:
4293     {
4294       Item_field *item_field= (Item_field*)item;
4295       if (item_field->field->table != tbl)
4296         return other_tbls_ok;
4297       /*
4298         The below is probably a repetition - the first part checks the
4299         other two, but let's play it safe:
4300       */
4301       return item_field->field->part_of_key.is_set(keyno) &&
4302              item_field->field->type() != MYSQL_TYPE_GEOMETRY &&
4303              item_field->field->type() != MYSQL_TYPE_BLOB;
4304     }
4305   case Item::REF_ITEM:
4306     return uses_index_fields_only(item->real_item(), tbl, keyno,
4307                                   other_tbls_ok);
4308   default:
4309     return FALSE; /* Play it safe, don't push unknown non-const items */
4310   }
4311 }
4312 
4313 
4314 /**
4315   Optimize semi-join nests that could be run with sj-materialization
4316 
4317   @param join           The join to optimize semi-join nests for
4318 
4319   @details
4320     Optimize each of the semi-join nests that can be run with
4321     materialization. For each of the nests, we
4322      - Generate the best join order for this "sub-join" and remember it;
4323      - Remember the sub-join execution cost (it's part of materialization
4324        cost);
4325      - Calculate other costs that will be incurred if we decide
4326        to use materialization strategy for this semi-join nest.
4327 
4328     All obtained information is saved and will be used by the main join
4329     optimization pass.
4330 
4331   @return false if successful, true if error
4332 */
4333 
optimize_semijoin_nests_for_materialization(JOIN * join)4334 static bool optimize_semijoin_nests_for_materialization(JOIN *join)
4335 {
4336   DBUG_ENTER("optimize_semijoin_nests_for_materialization");
4337   List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests);
4338   TABLE_LIST *sj_nest;
4339   Opt_trace_context * const trace= &join->thd->opt_trace;
4340 
4341   while ((sj_nest= sj_list_it++))
4342   {
4343     /* As a precaution, reset pointers that were used in prior execution */
4344     sj_nest->nested_join->sjm.positions= NULL;
4345 
4346     /* Calculate the cost of materialization if materialization is allowed. */
4347     if (join->thd->optimizer_switch_flag(OPTIMIZER_SWITCH_SEMIJOIN) &&
4348         join->thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MATERIALIZATION))
4349     {
4350       /* A semi-join nest should not contain tables marked as const */
4351       DBUG_ASSERT(!(sj_nest->sj_inner_tables & join->const_table_map));
4352 
4353       Opt_trace_object trace_wrapper(trace);
4354       Opt_trace_object
4355         trace_sjmat(trace, "execution_plan_for_potential_materialization");
4356       Opt_trace_array trace_sjmat_steps(trace, "steps");
4357       /*
4358         Try semijoin materialization if the semijoin is classified as
4359         non-trivially-correlated.
4360       */
4361       if (sj_nest->nested_join->sj_corr_tables)
4362         continue;
4363       /*
4364         Check whether data types allow execution with materialization.
4365       */
4366       semijoin_types_allow_materialization(sj_nest);
4367 
4368       if (!sj_nest->nested_join->sjm.scan_allowed &&
4369           !sj_nest->nested_join->sjm.lookup_allowed)
4370         continue;
4371 
4372       if (Optimize_table_order(join->thd, join, sj_nest).choose_table_order())
4373         DBUG_RETURN(true);
4374       const uint n_tables= my_count_bits(sj_nest->sj_inner_tables);
4375       calculate_materialization_costs(join, sj_nest, n_tables,
4376                                       &sj_nest->nested_join->sjm);
4377       /*
4378         Cost data is in sj_nest->nested_join->sjm. We also need to save the
4379         plan:
4380       */
4381       if (!(sj_nest->nested_join->sjm.positions=
4382             (st_position*)join->thd->alloc(sizeof(st_position)*n_tables)))
4383         DBUG_RETURN(true);
4384       memcpy(sj_nest->nested_join->sjm.positions,
4385              join->best_positions + join->const_tables,
4386              sizeof(st_position) * n_tables);
4387     }
4388   }
4389   DBUG_RETURN(false);
4390 }
4391 
4392 
4393 /*
4394   Check if table's Key_use elements have an eq_ref(outer_tables) candidate
4395 
4396   SYNOPSIS
4397     find_eq_ref_candidate()
4398       table             Table to be checked
4399       sj_inner_tables   Bitmap of inner tables. eq_ref(inner_table) doesn't
4400                         count.
4401 
4402   DESCRIPTION
4403     Check if table's Key_use elements have an eq_ref(outer_tables) candidate
4404 
4405   TODO
4406     Check again if it is feasible to factor common parts with constant table
4407     search
4408 
4409   RETURN
4410     TRUE  - There exists an eq_ref(outer-tables) candidate
4411     FALSE - Otherwise
4412 */
4413 
find_eq_ref_candidate(TABLE * table,table_map sj_inner_tables)4414 static bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables)
4415 {
4416   Key_use *keyuse= table->reginfo.join_tab->keyuse;
4417   uint key;
4418 
4419   if (keyuse)
4420   {
4421     while (1) /* For each key */
4422     {
4423       key= keyuse->key;
4424       KEY *keyinfo= table->key_info + key;
4425       key_part_map bound_parts= 0;
4426       if ((keyinfo->flags & (HA_NOSAME)) == HA_NOSAME)
4427       {
4428         do  /* For all equalities on all key parts */
4429         {
4430           /* Check if this is "t.keypart = expr(outer_tables) */
4431           if (!(keyuse->used_tables & sj_inner_tables) &&
4432               !(keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL))
4433           {
4434             /*
4435               Consider only if the resulting condition does not pass a NULL
4436               value through. Especially needed for a UNIQUE index on NULLable
4437               columns where a duplicate row is possible with NULL values.
4438             */
4439             if (keyuse->null_rejecting || !keyuse->val->maybe_null ||
4440                 !keyinfo->key_part[keyuse->keypart].field->maybe_null())
4441               bound_parts|= (key_part_map)1 << keyuse->keypart;
4442           }
4443           keyuse++;
4444         } while (keyuse->key == key && keyuse->table == table);
4445 
4446         if (bound_parts == LOWER_BITS(uint, keyinfo->user_defined_key_parts))
4447           return TRUE;
4448         if (keyuse->table != table)
4449           return FALSE;
4450       }
4451       else
4452       {
4453         do
4454         {
4455           keyuse++;
4456           if (keyuse->table != table)
4457             return FALSE;
4458         }
4459         while (keyuse->key == key);
4460       }
4461     }
4462   }
4463   return FALSE;
4464 }
4465 
4466 
4467 /**
4468   Pull tables out of semi-join nests based on functional dependencies
4469 
4470   @param join  The join where to do the semi-join table pullout
4471 
4472   @return False if successful, true if error (Out of memory)
4473 
4474   @details
4475     Pull tables out of semi-join nests based on functional dependencies,
4476     ie. if a table is accessed via eq_ref(outer_tables).
4477     The function may be called several times, the caller is responsible
4478     for setting up proper key information that this function acts upon.
4479 
4480     PRECONDITIONS
4481     When this function is called, the join may have several semi-join nests
4482     but it is guaranteed that one semi-join nest does not contain another.
4483     For functionally dependent tables to be pulled out, key information must
4484     have been calculated (see update_ref_and_keys()).
4485 
4486     POSTCONDITIONS
4487      * Tables that were pulled out are removed from the semi-join nest they
4488        belonged to and added to the parent join nest.
4489      * For these tables, the used_tables and not_null_tables fields of
4490        the semi-join nest they belonged to will be adjusted.
4491        The semi-join nest is also marked as correlated, and
4492        sj_corr_tables and sj_depends_on are adjusted if necessary.
4493      * Semi-join nests' sj_inner_tables is set equal to used_tables
4494 
4495     NOTE
4496     Table pullout may make uncorrelated subquery correlated. Consider this
4497     example:
4498 
4499      ... WHERE oe IN (SELECT it1.primary_key WHERE p(it1, it2) ... )
4500 
4501     here table it1 can be pulled out (we have it1.primary_key=oe which gives
4502     us functional dependency). Once it1 is pulled out, all references to it1
4503     from p(it1, it2) become references to outside of the subquery and thus
4504     make the subquery (i.e. its semi-join nest) correlated.
4505     Making the subquery (i.e. its semi-join nest) correlated prevents us from
4506     using Materialization or LooseScan to execute it.
4507 */
4508 
pull_out_semijoin_tables(JOIN * join)4509 static bool pull_out_semijoin_tables(JOIN *join)
4510 {
4511   TABLE_LIST *sj_nest;
4512   DBUG_ENTER("pull_out_semijoin_tables");
4513 
4514   DBUG_ASSERT(!join->select_lex->sj_nests.is_empty());
4515 
4516   List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests);
4517   Opt_trace_context * const trace= &join->thd->opt_trace;
4518   Opt_trace_object trace_wrapper(trace);
4519   Opt_trace_array trace_pullout(trace, "pulled_out_semijoin_tables");
4520 
4521   /* Try pulling out tables from each semi-join nest */
4522   while ((sj_nest= sj_list_it++))
4523   {
4524     table_map pulled_tables= 0;
4525     List_iterator<TABLE_LIST> child_li(sj_nest->nested_join->join_list);
4526     TABLE_LIST *tbl;
4527     /*
4528       Calculate set of tables within this semi-join nest that have
4529       other dependent tables
4530     */
4531     table_map dep_tables= 0;
4532     while ((tbl= child_li++))
4533     {
4534       TABLE *const table= tbl->table;
4535       if (table &&
4536          (table->reginfo.join_tab->dependent &
4537           sj_nest->nested_join->used_tables))
4538         dep_tables|= table->reginfo.join_tab->dependent;
4539     }
4540     /*
4541       Find which tables we can pull out based on key dependency data.
4542       Note that pulling one table out can allow us to pull out some
4543       other tables too.
4544     */
4545     bool pulled_a_table;
4546     do
4547     {
4548       pulled_a_table= FALSE;
4549       child_li.rewind();
4550       while ((tbl= child_li++))
4551       {
4552         if (tbl->table &&
4553             !(pulled_tables & tbl->table->map) &&
4554             !(dep_tables & tbl->table->map))
4555         {
4556           if (find_eq_ref_candidate(tbl->table,
4557                                     sj_nest->nested_join->used_tables &
4558                                     ~pulled_tables))
4559           {
4560             pulled_a_table= TRUE;
4561             pulled_tables |= tbl->table->map;
4562             Opt_trace_object(trace).add_utf8_table(tbl->table).
4563               add("functionally_dependent", true);
4564             /*
4565               Pulling a table out of uncorrelated subquery in general makes
4566               makes it correlated. See the NOTE to this function.
4567             */
4568             sj_nest->nested_join->sj_corr_tables|= tbl->table->map;
4569             sj_nest->nested_join->sj_depends_on|= tbl->table->map;
4570           }
4571         }
4572       }
4573     } while (pulled_a_table);
4574 
4575     child_li.rewind();
4576     /*
4577       Move the pulled out TABLE_LIST elements to the parents.
4578     */
4579     sj_nest->nested_join->used_tables&= ~pulled_tables;
4580     sj_nest->nested_join->not_null_tables&= ~pulled_tables;
4581 
4582     /* sj_inner_tables is a copy of nested_join->used_tables */
4583     sj_nest->sj_inner_tables= sj_nest->nested_join->used_tables;
4584 
4585     if (pulled_tables)
4586     {
4587       List<TABLE_LIST> *upper_join_list= (sj_nest->embedding != NULL) ?
4588           &sj_nest->embedding->nested_join->join_list :
4589           &join->select_lex->top_join_list;
4590 
4591       Prepared_stmt_arena_holder ps_arena_holder(join->thd);
4592 
4593       while ((tbl= child_li++))
4594       {
4595         if (tbl->table &&
4596             !(sj_nest->nested_join->used_tables & tbl->table->map))
4597         {
4598           /*
4599             Pull the table up in the same way as simplify_joins() does:
4600             update join_list and embedding pointers but keep next[_local]
4601             pointers.
4602           */
4603           child_li.remove();
4604 
4605           if (upper_join_list->push_back(tbl))
4606             DBUG_RETURN(TRUE);
4607 
4608           tbl->join_list= upper_join_list;
4609           tbl->embedding= sj_nest->embedding;
4610         }
4611       }
4612 
4613       /* Remove the sj-nest itself if we've removed everything from it */
4614       if (!sj_nest->nested_join->used_tables)
4615       {
4616         List_iterator<TABLE_LIST> li(*upper_join_list);
4617         /* Find the sj_nest in the list. */
4618         while (sj_nest != li++)
4619         {}
4620         li.remove();
4621         /* Also remove it from the list of SJ-nests: */
4622         sj_list_it.remove();
4623       }
4624     }
4625   }
4626   DBUG_RETURN(FALSE);
4627 }
4628 
4629 
4630 /*****************************************************************************
4631   Check with keys are used and with tables references with tables
4632   Updates in stat:
4633 	  keys	     Bitmap of all used keys
4634 	  const_keys Bitmap of all keys with may be used with quick_select
4635 	  keyuse     Pointer to possible keys
4636 *****************************************************************************/
4637 
4638 /// Used when finding key fields
4639 struct Key_field {
Key_fieldKey_field4640   Key_field(Field *field, Item *val, uint level, uint optimize, bool eq_func,
4641             bool null_rejecting, bool *cond_guard, uint sj_pred_no)
4642   : field(field), val(val), level(level), optimize(optimize), eq_func(eq_func),
4643   null_rejecting(null_rejecting), cond_guard(cond_guard),
4644   sj_pred_no(sj_pred_no)
4645   {}
4646   Field		*field;
4647   Item		*val;			///< May be empty if diff constant
4648   uint		level;
4649   uint		optimize; // KEY_OPTIMIZE_*
4650   bool		eq_func;
4651   /**
4652     If true, the condition this struct represents will not be satisfied
4653     when val IS NULL.
4654     @sa Key_use::null_rejecting .
4655   */
4656   bool          null_rejecting;
4657   bool          *cond_guard;                    ///< @sa Key_use::cond_guard
4658   uint          sj_pred_no;                     ///< @sa Key_use::sj_pred_no
4659 };
4660 
4661 /* Values in optimize */
4662 #define KEY_OPTIMIZE_EXISTS		1
4663 #define KEY_OPTIMIZE_REF_OR_NULL	2
4664 
4665 /**
4666   Merge new key definitions to old ones, remove those not used in both.
4667 
4668   This is called for OR between different levels.
4669 
4670   To be able to do 'ref_or_null' we merge a comparison of a column
4671   and 'column IS NULL' to one test.  This is useful for sub select queries
4672   that are internally transformed to something like:.
4673 
4674   @code
4675   SELECT * FROM t1 WHERE t1.key=outer_ref_field or t1.key IS NULL
4676   @endcode
4677 
4678   Key_field::null_rejecting is processed as follows: @n
4679   result has null_rejecting=true if it is set for both ORed references.
4680   for example:
4681   -   (t2.key = t1.field OR t2.key  =  t1.field) -> null_rejecting=true
4682   -   (t2.key = t1.field OR t2.key <=> t1.field) -> null_rejecting=false
4683 
4684   @todo
4685     The result of this is that we're missing some 'ref' accesses.
4686     OptimizerTeam: Fix this
4687 */
4688 
4689 static Key_field *
merge_key_fields(Key_field * start,Key_field * new_fields,Key_field * end,uint and_level)4690 merge_key_fields(Key_field *start, Key_field *new_fields, Key_field *end,
4691 		 uint and_level)
4692 {
4693   if (start == new_fields)
4694     return start;				// Impossible or
4695   if (new_fields == end)
4696     return start;				// No new fields, skip all
4697 
4698   Key_field *first_free=new_fields;
4699 
4700   /* Mark all found fields in old array */
4701   for (; new_fields != end ; new_fields++)
4702   {
4703     for (Key_field *old=start ; old != first_free ; old++)
4704     {
4705       if (old->field == new_fields->field)
4706       {
4707         /*
4708           NOTE: below const_item() call really works as "!used_tables()", i.e.
4709           it can return FALSE where it is feasible to make it return TRUE.
4710 
4711           The cause is as follows: Some of the tables are already known to be
4712           const tables (the detection code is in make_join_statistics(),
4713           above the update_ref_and_keys() call), but we didn't propagate
4714           information about this: TABLE::const_table is not set to TRUE, and
4715           Item::update_used_tables() hasn't been called for each item.
4716           The result of this is that we're missing some 'ref' accesses.
4717           TODO: OptimizerTeam: Fix this
4718         */
4719 	if (!new_fields->val->const_item())
4720 	{
4721 	  /*
4722 	    If the value matches, we can use the key reference.
4723 	    If not, we keep it until we have examined all new values
4724 	  */
4725 	  if (old->val->eq(new_fields->val, old->field->binary()))
4726 	  {
4727 	    old->level= and_level;
4728 	    old->optimize= ((old->optimize & new_fields->optimize &
4729 			     KEY_OPTIMIZE_EXISTS) |
4730 			    ((old->optimize | new_fields->optimize) &
4731 			     KEY_OPTIMIZE_REF_OR_NULL));
4732             old->null_rejecting= (old->null_rejecting &&
4733                                   new_fields->null_rejecting);
4734 	  }
4735 	}
4736 	else if (old->eq_func && new_fields->eq_func &&
4737                  old->val->eq_by_collation(new_fields->val,
4738                                            old->field->binary(),
4739                                            old->field->charset()))
4740 
4741 	{
4742 	  old->level= and_level;
4743 	  old->optimize= ((old->optimize & new_fields->optimize &
4744 			   KEY_OPTIMIZE_EXISTS) |
4745 			  ((old->optimize | new_fields->optimize) &
4746 			   KEY_OPTIMIZE_REF_OR_NULL));
4747           old->null_rejecting= (old->null_rejecting &&
4748                                 new_fields->null_rejecting);
4749 	}
4750 	else if (old->eq_func && new_fields->eq_func &&
4751 		 ((old->val->const_item() && old->val->is_null()) ||
4752                   new_fields->val->is_null()))
4753 	{
4754 	  /* field = expression OR field IS NULL */
4755 	  old->level= and_level;
4756 	  old->optimize= KEY_OPTIMIZE_REF_OR_NULL;
4757 	  /*
4758             Remember the NOT NULL value unless the value does not depend
4759             on other tables.
4760           */
4761 	  if (!old->val->used_tables() && old->val->is_null())
4762 	    old->val= new_fields->val;
4763           /* The referred expression can be NULL: */
4764           old->null_rejecting= 0;
4765 	}
4766 	else
4767 	{
4768 	  /*
4769 	    We are comparing two different const.  In this case we can't
4770 	    use a key-lookup on this so it's better to remove the value
4771 	    and let the range optimzier handle it
4772 	  */
4773 	  if (old == --first_free)		// If last item
4774 	    break;
4775 	  *old= *first_free;			// Remove old value
4776 	  old--;				// Retry this value
4777 	}
4778       }
4779     }
4780   }
4781   /* Remove all not used items */
4782   for (Key_field *old=start ; old != first_free ;)
4783   {
4784     if (old->level != and_level)
4785     {						// Not used in all levels
4786       if (old == --first_free)
4787 	break;
4788       *old= *first_free;			// Remove old value
4789       continue;
4790     }
4791     old++;
4792   }
4793   return first_free;
4794 }
4795 
4796 
4797 /**
4798   Given a field, return its index in semi-join's select list, or UINT_MAX
4799 
4800   @param field Field that we are looking up table for
4801 
4802   @retval =UINT_MAX Field is not from a semijoin-transformed subquery
4803   @retval <UINT_MAX Index in select list of subquery
4804 
4805   @details
4806   Given a field, find its table; then see if the table is within a
4807   semi-join nest and if the field was in select list of the subquery
4808   (if subquery was part of a quantified comparison predicate), or
4809   the field was a result of subquery decorrelation.
4810   If it was, then return the field's index in the select list.
4811   The value is used by LooseScan strategy.
4812 */
4813 
get_semi_join_select_list_index(Field * field)4814 static uint get_semi_join_select_list_index(Field *field)
4815 {
4816   TABLE_LIST *emb_sj_nest= field->table->pos_in_table_list->embedding;
4817   if (emb_sj_nest && emb_sj_nest->sj_on_expr)
4818   {
4819     List<Item> &items= emb_sj_nest->nested_join->sj_inner_exprs;
4820     List_iterator<Item> it(items);
4821     for (uint i= 0; i < items.elements; i++)
4822     {
4823       Item *sel_item= it++;
4824       if (sel_item->type() == Item::FIELD_ITEM &&
4825           ((Item_field*)sel_item)->field->eq(field))
4826         return i;
4827     }
4828   }
4829   return UINT_MAX;
4830 }
4831 
4832 /**
4833    @brief
4834    If EXPLAIN EXTENDED, add warning that an index cannot be used for
4835    ref access
4836 
4837    @details
4838    If EXPLAIN EXTENDED, add a warning for each index that cannot be
4839    used for ref access due to either type conversion or different
4840    collations on the field used for comparison
4841 
4842    Example type conversion (char compared to int):
4843 
4844    CREATE TABLE t1 (url char(1) PRIMARY KEY);
4845    SELECT * FROM t1 WHERE url=1;
4846 
4847    Example different collations (danish vs german2):
4848 
4849    CREATE TABLE t1 (url char(1) PRIMARY KEY) collate latin1_danish_ci;
4850    SELECT * FROM t1 WHERE url='1' collate latin1_german2_ci;
4851 
4852    @param thd                Thread for the connection that submitted the query
4853    @param field              Field used in comparision
4854    @param cant_use_indexes   Indexes that cannot be used for lookup
4855  */
4856 static void
warn_index_not_applicable(THD * thd,const Field * field,const key_map cant_use_index)4857 warn_index_not_applicable(THD *thd, const Field *field,
4858                           const key_map cant_use_index)
4859 {
4860   if (thd->lex->describe & DESCRIBE_EXTENDED)
4861     for (uint j=0 ; j < field->table->s->keys ; j++)
4862       if (cant_use_index.is_set(j))
4863         push_warning_printf(thd,
4864                             Sql_condition::WARN_LEVEL_WARN,
4865                             ER_WARN_INDEX_NOT_APPLICABLE,
4866                             ER(ER_WARN_INDEX_NOT_APPLICABLE),
4867                             "ref",
4868                             field->table->key_info[j].name,
4869                             field->field_name);
4870 }
4871 
4872 /**
4873   Add a possible key to array of possible keys if it's usable as a key
4874 
4875     @param key_fields      Pointer to add key, if usable
4876     @param and_level       And level, to be stored in Key_field
4877     @param cond            Condition predicate
4878     @param field           Field used in comparision
4879     @param eq_func         True if we used =, <=> or IS NULL
4880     @param value           Array of values used for comparison with field
4881     @param num_values      Number of elements in the array of values
4882     @param usable_tables   Tables which can be used for key optimization
4883     @param sargables       IN/OUT Array of found sargable candidates
4884 
4885   @note
4886     If we are doing a NOT NULL comparison on a NOT NULL field in a outer join
4887     table, we store this to be able to do not exists optimization later.
4888 
4889   @returns
4890     *key_fields is incremented if we stored a key in the array
4891 */
4892 
4893 static void
add_key_field(Key_field ** key_fields,uint and_level,Item_func * cond,Field * field,bool eq_func,Item ** value,uint num_values,table_map usable_tables,SARGABLE_PARAM ** sargables)4894 add_key_field(Key_field **key_fields,uint and_level, Item_func *cond,
4895               Field *field, bool eq_func, Item **value, uint num_values,
4896               table_map usable_tables, SARGABLE_PARAM **sargables)
4897 {
4898   DBUG_PRINT("info",("add_key_field for field %s",field->field_name));
4899   uint exists_optimize= 0;
4900   TABLE_LIST *table= field->table->pos_in_table_list;
4901 
4902   if (field->table->reginfo.join_tab == NULL)
4903   {
4904     /*
4905        Due to a bug in IN-to-EXISTS (grep for real_item() in item_subselect.cc
4906        for more info), an index over a field from an outer query might be
4907        considered here, which is incorrect. Their query has been fully
4908        optimized already so their reginfo.join_tab is NULL and we reject them.
4909     */
4910     return;
4911   }
4912 
4913   if (!table->derived_keys_ready && table->uses_materialization() &&
4914       !field->table->is_created() &&
4915       table->update_derived_keys(field, value, num_values))
4916     return;
4917   if (!(field->flags & PART_KEY_FLAG))
4918   {
4919     // Don't remove column IS NULL on a LEFT JOIN table
4920     if (!eq_func || (*value)->type() != Item::NULL_ITEM ||
4921         !field->table->maybe_null || field->real_maybe_null())
4922       return;					// Not a key. Skip it
4923     exists_optimize= KEY_OPTIMIZE_EXISTS;
4924     DBUG_ASSERT(num_values == 1);
4925   }
4926   else
4927   {
4928     table_map used_tables=0;
4929     bool optimizable=0;
4930     for (uint i=0; i<num_values; i++)
4931     {
4932       used_tables|=(value[i])->used_tables();
4933       if (!((value[i])->used_tables() & (field->table->map | RAND_TABLE_BIT)))
4934         optimizable=1;
4935     }
4936     if (!optimizable)
4937       return;
4938     if (!(usable_tables & field->table->map))
4939     {
4940       if (!eq_func || (*value)->type() != Item::NULL_ITEM ||
4941           !field->table->maybe_null || field->real_maybe_null())
4942 	return;					// Can't use left join optimize
4943       exists_optimize= KEY_OPTIMIZE_EXISTS;
4944     }
4945     else
4946     {
4947       JOIN_TAB *stat=field->table->reginfo.join_tab;
4948       key_map possible_keys=field->key_start;
4949       possible_keys.intersect(field->table->keys_in_use_for_query);
4950       stat[0].keys.merge(possible_keys);             // Add possible keys
4951 
4952       /*
4953 	Save the following cases:
4954 	Field op constant
4955 	Field LIKE constant where constant doesn't start with a wildcard
4956 	Field = field2 where field2 is in a different table
4957 	Field op formula
4958 	Field IS NULL
4959 	Field IS NOT NULL
4960          Field BETWEEN ...
4961          Field IN ...
4962       */
4963       stat[0].key_dependent|=used_tables;
4964 
4965       bool is_const=1;
4966       for (uint i=0; i<num_values; i++)
4967       {
4968         if (!(is_const&= value[i]->const_item()))
4969           break;
4970       }
4971       if (is_const)
4972         stat[0].const_keys.merge(possible_keys);
4973       else if (!eq_func)
4974       {
4975         /*
4976           Save info to be able check whether this predicate can be
4977           considered as sargable for range analisis after reading const tables.
4978           We do not save info about equalities as update_const_equal_items
4979           will take care of updating info on keys from sargable equalities.
4980         */
4981         (*sargables)--;
4982         (*sargables)->field= field;
4983         (*sargables)->arg_value= value;
4984         (*sargables)->num_values= num_values;
4985       }
4986       /*
4987 	We can't always use indexes when comparing a string index to a
4988 	number. cmp_type() is checked to allow compare of dates to numbers.
4989         eq_func is NEVER true when num_values > 1
4990        */
4991       if (!eq_func)
4992         return;
4993       if (field->result_type() == STRING_RESULT)
4994       {
4995         if ((*value)->result_type() != STRING_RESULT)
4996         {
4997           if (field->cmp_type() != (*value)->result_type())
4998           {
4999             warn_index_not_applicable(stat->join->thd, field, possible_keys);
5000             return;
5001           }
5002         }
5003         else
5004         {
5005           /*
5006             Can't optimize datetime_column=indexed_varchar_column,
5007             also can't use indexes if the effective collation
5008             of the operation differ from the field collation.
5009             IndexedTimeComparedToDate: can't optimize
5010             'indexed_time = temporal_expr_with_date_part' because:
5011             - without index, a TIME column with value '48:00:00' is equal to a
5012             DATETIME column with value 'CURDATE() + 2 days'
5013             - with ref access into the TIME column, CURDATE() + 2 days becomes
5014             "00:00:00" (Field_timef::store_internal() simply extracts the time
5015             part from the datetime) which is a lookup key which does not match
5016             "48:00:00"; so ref access is not be able to give the same result
5017             as without index, so is disabled.
5018             On the other hand, we can optimize indexed_datetime = time
5019             because Field_temporal_with_date::store_time() will convert
5020             48:00:00 to CURDATE() + 2 days which is the correct lookup key.
5021           */
5022           if ((!field->is_temporal() && value[0]->is_temporal()) ||
5023               (field->cmp_type() == STRING_RESULT &&
5024                field->charset() != cond->compare_collation()) ||
5025               field_time_cmp_date(field, value[0]))
5026           {
5027             warn_index_not_applicable(stat->join->thd, field, possible_keys);
5028             return;
5029           }
5030         }
5031       }
5032     }
5033   }
5034   /*
5035     For the moment eq_func is always true. This slot is reserved for future
5036     extensions where we want to remembers other things than just eq comparisons
5037   */
5038   DBUG_ASSERT(eq_func);
5039   /*
5040     If the condition has form "tbl.keypart = othertbl.field" and
5041     othertbl.field can be NULL, there will be no matches if othertbl.field
5042     has NULL value.
5043     We use null_rejecting in add_not_null_conds() to add
5044     'othertbl.field IS NOT NULL' to tab->m_condition, if this is not an outer
5045     join. We also use it to shortcut reading "tbl" when othertbl.field is
5046     found to be a NULL value (in join_read_always_key() and BKA).
5047   */
5048   bool null_rejecting;
5049   Item *real= (*value)->real_item();
5050   if (((cond->functype() == Item_func::EQ_FUNC) ||
5051        (cond->functype() == Item_func::MULT_EQUAL_FUNC)) &&
5052       (real->type() == Item::FIELD_ITEM) &&
5053       ((Item_field*)real)->field->maybe_null())
5054     null_rejecting= true;
5055   else
5056     null_rejecting= false;
5057 
5058   /* Store possible eq field */
5059   new (*key_fields)
5060     Key_field(field, *value, and_level, exists_optimize, eq_func,
5061               null_rejecting, NULL, get_semi_join_select_list_index(field));
5062   (*key_fields)++;
5063 }
5064 
5065 /**
5066   Add possible keys to array of possible keys originated from a simple
5067   predicate.
5068 
5069     @param  key_fields     Pointer to add key, if usable
5070     @param  and_level      And level, to be stored in Key_field
5071     @param  cond           Condition predicate
5072     @param  field          Field used in comparision
5073     @param  eq_func        True if we used =, <=> or IS NULL
5074     @param  value          Value used for comparison with field
5075                            Is NULL for BETWEEN and IN
5076     @param  usable_tables  Tables which can be used for key optimization
5077     @param  sargables      IN/OUT Array of found sargable candidates
5078 
5079   @note
5080     If field items f1 and f2 belong to the same multiple equality and
5081     a key is added for f1, the the same key is added for f2.
5082 
5083   @returns
5084     *key_fields is incremented if we stored a key in the array
5085 */
5086 
5087 static void
add_key_equal_fields(Key_field ** key_fields,uint and_level,Item_func * cond,Item_field * field_item,bool eq_func,Item ** val,uint num_values,table_map usable_tables,SARGABLE_PARAM ** sargables)5088 add_key_equal_fields(Key_field **key_fields, uint and_level,
5089                      Item_func *cond, Item_field *field_item,
5090                      bool eq_func, Item **val,
5091                      uint num_values, table_map usable_tables,
5092                      SARGABLE_PARAM **sargables)
5093 {
5094   Field *field= field_item->field;
5095   add_key_field(key_fields, and_level, cond, field,
5096                 eq_func, val, num_values, usable_tables, sargables);
5097   Item_equal *item_equal= field_item->item_equal;
5098   if (item_equal)
5099   {
5100     /*
5101       Add to the set of possible key values every substitution of
5102       the field for an equal field included into item_equal
5103     */
5104     Item_equal_iterator it(*item_equal);
5105     Item_field *item;
5106     while ((item= it++))
5107     {
5108       if (!field->eq(item->field))
5109       {
5110         add_key_field(key_fields, and_level, cond, item->field,
5111                       eq_func, val, num_values, usable_tables,
5112                       sargables);
5113       }
5114     }
5115   }
5116 }
5117 
5118 
5119 /**
5120   Check if an expression is a non-outer field.
5121 
5122   Checks if an expression is a field and belongs to the current select.
5123 
5124   @param   field  Item expression to check
5125 
5126   @return boolean
5127      @retval TRUE   the expression is a local field
5128      @retval FALSE  it's something else
5129 */
5130 
5131 static bool
is_local_field(Item * field)5132 is_local_field (Item *field)
5133 {
5134   return field->real_item()->type() == Item::FIELD_ITEM
5135     && !(field->used_tables() & OUTER_REF_TABLE_BIT)
5136     && !((Item_field *)field->real_item())->depended_from;
5137 }
5138 
5139 
5140 static void
add_key_fields(JOIN * join,Key_field ** key_fields,uint * and_level,Item * cond,table_map usable_tables,SARGABLE_PARAM ** sargables)5141 add_key_fields(JOIN *join, Key_field **key_fields, uint *and_level,
5142                Item *cond, table_map usable_tables,
5143                SARGABLE_PARAM **sargables)
5144 {
5145   if (cond->type() == Item_func::COND_ITEM)
5146   {
5147     List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
5148     Key_field *org_key_fields= *key_fields;
5149 
5150     if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
5151     {
5152       Item *item;
5153       while ((item=li++))
5154         add_key_fields(join, key_fields, and_level, item, usable_tables,
5155                        sargables);
5156       for (; org_key_fields != *key_fields ; org_key_fields++)
5157 	org_key_fields->level= *and_level;
5158     }
5159     else
5160     {
5161       (*and_level)++;
5162       add_key_fields(join, key_fields, and_level, li++, usable_tables,
5163                      sargables);
5164       Item *item;
5165       while ((item=li++))
5166       {
5167 	Key_field *start_key_fields= *key_fields;
5168 	(*and_level)++;
5169         add_key_fields(join, key_fields, and_level, item, usable_tables,
5170                        sargables);
5171 	*key_fields=merge_key_fields(org_key_fields,start_key_fields,
5172 				     *key_fields,++(*and_level));
5173       }
5174     }
5175     return;
5176   }
5177 
5178   /*
5179     Subquery optimization: Conditions that are pushed down into subqueries
5180     are wrapped into Item_func_trig_cond. We process the wrapped condition
5181     but need to set cond_guard for Key_use elements generated from it.
5182   */
5183   {
5184     if (cond->type() == Item::FUNC_ITEM &&
5185         ((Item_func*)cond)->functype() == Item_func::TRIG_COND_FUNC)
5186     {
5187       Item *cond_arg= ((Item_func*)cond)->arguments()[0];
5188       if (!join->group_list && !join->order &&
5189           join->unit->item &&
5190           join->unit->item->substype() == Item_subselect::IN_SUBS &&
5191           !join->unit->is_union())
5192       {
5193         Key_field *save= *key_fields;
5194         add_key_fields(join, key_fields, and_level, cond_arg, usable_tables,
5195                        sargables);
5196         // Indicate that this ref access candidate is for subquery lookup:
5197         for (; save != *key_fields; save++)
5198           save->cond_guard= ((Item_func_trig_cond*)cond)->get_trig_var();
5199       }
5200       return;
5201     }
5202   }
5203 
5204   /* If item is of type 'field op field/constant' add it to key_fields */
5205   if (cond->type() != Item::FUNC_ITEM)
5206     return;
5207   Item_func *cond_func= (Item_func*) cond;
5208   switch (cond_func->select_optimize()) {
5209   case Item_func::OPTIMIZE_NONE:
5210     break;
5211   case Item_func::OPTIMIZE_KEY:
5212   {
5213     Item **values;
5214     /*
5215       Build list of possible keys for 'a BETWEEN low AND high'.
5216       It is handled similar to the equivalent condition
5217       'a >= low AND a <= high':
5218     */
5219     if (cond_func->functype() == Item_func::BETWEEN)
5220     {
5221       Item_field *field_item;
5222       bool equal_func= FALSE;
5223       uint num_values= 2;
5224       values= cond_func->arguments();
5225 
5226       bool binary_cmp= (values[0]->real_item()->type() == Item::FIELD_ITEM)
5227             ? ((Item_field*)values[0]->real_item())->field->binary()
5228             : TRUE;
5229 
5230       /*
5231         Additional optimization: If 'low = high':
5232         Handle as if the condition was "t.key = low".
5233       */
5234       if (!((Item_func_between*)cond_func)->negated &&
5235           values[1]->eq(values[2], binary_cmp))
5236       {
5237         equal_func= TRUE;
5238         num_values= 1;
5239       }
5240 
5241       /*
5242         Append keys for 'field <cmp> value[]' if the
5243         condition is of the form::
5244         '<field> BETWEEN value[1] AND value[2]'
5245       */
5246       if (is_local_field (values[0]))
5247       {
5248         field_item= (Item_field *) (values[0]->real_item());
5249         add_key_equal_fields(key_fields, *and_level, cond_func,
5250                              field_item, equal_func, &values[1],
5251                              num_values, usable_tables, sargables);
5252       }
5253       /*
5254         Append keys for 'value[0] <cmp> field' if the
5255         condition is of the form:
5256         'value[0] BETWEEN field1 AND field2'
5257       */
5258       for (uint i= 1; i <= num_values; i++)
5259       {
5260         if (is_local_field (values[i]))
5261         {
5262           field_item= (Item_field *) (values[i]->real_item());
5263           add_key_equal_fields(key_fields, *and_level, cond_func,
5264                                field_item, equal_func, values,
5265                                1, usable_tables, sargables);
5266         }
5267       }
5268     } // if ( ... Item_func::BETWEEN)
5269 
5270     // IN, NE
5271     else if (is_local_field (cond_func->key_item()) &&
5272             !(cond_func->used_tables() & OUTER_REF_TABLE_BIT))
5273     {
5274       values= cond_func->arguments()+1;
5275       if (cond_func->functype() == Item_func::NE_FUNC &&
5276         is_local_field (cond_func->arguments()[1]))
5277         values--;
5278       DBUG_ASSERT(cond_func->functype() != Item_func::IN_FUNC ||
5279                   cond_func->argument_count() != 2);
5280       add_key_equal_fields(key_fields, *and_level, cond_func,
5281                            (Item_field*) (cond_func->key_item()->real_item()),
5282                            0, values,
5283                            cond_func->argument_count()-1,
5284                            usable_tables, sargables);
5285     }
5286     break;
5287   }
5288   case Item_func::OPTIMIZE_OP:
5289   {
5290     bool equal_func=(cond_func->functype() == Item_func::EQ_FUNC ||
5291 		     cond_func->functype() == Item_func::EQUAL_FUNC);
5292 
5293     if (is_local_field (cond_func->arguments()[0]))
5294     {
5295       add_key_equal_fields(key_fields, *and_level, cond_func,
5296 	                (Item_field*) (cond_func->arguments()[0])->real_item(),
5297 		           equal_func,
5298                            cond_func->arguments()+1, 1, usable_tables,
5299                            sargables);
5300     }
5301     if (is_local_field (cond_func->arguments()[1]) &&
5302 	cond_func->functype() != Item_func::LIKE_FUNC)
5303     {
5304       add_key_equal_fields(key_fields, *and_level, cond_func,
5305                        (Item_field*) (cond_func->arguments()[1])->real_item(),
5306 		           equal_func,
5307                            cond_func->arguments(),1,usable_tables,
5308                            sargables);
5309     }
5310     break;
5311   }
5312   case Item_func::OPTIMIZE_NULL:
5313     /* column_name IS [NOT] NULL */
5314     if (is_local_field (cond_func->arguments()[0]) &&
5315 	!(cond_func->used_tables() & OUTER_REF_TABLE_BIT))
5316     {
5317       Item *tmp=new Item_null;
5318       if (unlikely(!tmp))                       // Should never be true
5319 	return;
5320       add_key_equal_fields(key_fields, *and_level, cond_func,
5321 		    (Item_field*) (cond_func->arguments()[0])->real_item(),
5322 		    cond_func->functype() == Item_func::ISNULL_FUNC,
5323 			   &tmp, 1, usable_tables, sargables);
5324     }
5325     break;
5326   case Item_func::OPTIMIZE_EQUAL:
5327     Item_equal *item_equal= (Item_equal *) cond;
5328     Item *const_item= item_equal->get_const();
5329     Item_equal_iterator it(*item_equal);
5330     Item_field *item;
5331     if (const_item)
5332     {
5333       /*
5334         For each field field1 from item_equal consider the equality
5335         field1=const_item as a condition allowing an index access of the table
5336         with field1 by the keys value of field1.
5337       */
5338       while ((item= it++))
5339       {
5340         add_key_field(key_fields, *and_level, cond_func, item->field,
5341                       TRUE, &const_item, 1, usable_tables, sargables);
5342       }
5343     }
5344     else
5345     {
5346       /*
5347         Consider all pairs of different fields included into item_equal.
5348         For each of them (field1, field1) consider the equality
5349         field1=field2 as a condition allowing an index access of the table
5350         with field1 by the keys value of field2.
5351       */
5352       Item_equal_iterator fi(*item_equal);
5353       while ((item= fi++))
5354       {
5355         Field *field= item->field;
5356         while ((item= it++))
5357         {
5358           if (!field->eq(item->field))
5359           {
5360             add_key_field(key_fields, *and_level, cond_func, field,
5361                           TRUE, (Item **) &item, 1, usable_tables,
5362                           sargables);
5363           }
5364         }
5365         it.rewind();
5366       }
5367     }
5368     break;
5369   }
5370 }
5371 
5372 
5373 /*
5374   Add all keys with uses 'field' for some keypart
5375   If field->and_level != and_level then only mark key_part as const_part
5376 
5377   RETURN
5378    0 - OK
5379    1 - Out of memory.
5380 */
5381 
5382 static bool
add_key_part(Key_use_array * keyuse_array,Key_field * key_field)5383 add_key_part(Key_use_array *keyuse_array, Key_field *key_field)
5384 {
5385   Field *field=key_field->field;
5386   TABLE *form= field->table;
5387 
5388   if (key_field->eq_func && !(key_field->optimize & KEY_OPTIMIZE_EXISTS))
5389   {
5390     for (uint key=0 ; key < form->s->keys ; key++)
5391     {
5392       if (!(form->keys_in_use_for_query.is_set(key)))
5393 	continue;
5394       if (form->key_info[key].flags & (HA_FULLTEXT | HA_SPATIAL))
5395 	continue;    // ToDo: ft-keys in non-ft queries.   SerG
5396 
5397       uint key_parts= actual_key_parts(&form->key_info[key]);
5398       for (uint part=0 ; part <  key_parts ; part++)
5399       {
5400 	if (field->eq(form->key_info[key].key_part[part].field))
5401 	{
5402           const Key_use keyuse(field->table,
5403                                key_field->val,
5404                                key_field->val->used_tables(),
5405                                key,
5406                                part,
5407                                key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL,
5408                                (key_part_map) 1 << part,
5409                                ~(ha_rows) 0, // will be set in optimize_keyuse
5410                                key_field->null_rejecting,
5411                                key_field->cond_guard,
5412                                key_field->sj_pred_no);
5413           if (keyuse_array->push_back(keyuse))
5414             return TRUE;
5415 	}
5416       }
5417     }
5418   }
5419   return FALSE;
5420 }
5421 
5422 
5423 static bool
add_ft_keys(Key_use_array * keyuse_array,JOIN_TAB * stat,Item * cond,table_map usable_tables)5424 add_ft_keys(Key_use_array *keyuse_array,
5425             JOIN_TAB *stat,Item *cond,table_map usable_tables)
5426 {
5427   Item_func_match *cond_func=NULL;
5428 
5429   if (!cond)
5430     return FALSE;
5431 
5432   if (cond->type() == Item::FUNC_ITEM)
5433   {
5434     Item_func *func=(Item_func *)cond;
5435     Item_func::Functype functype=  func->functype();
5436     if (functype == Item_func::FT_FUNC)
5437       cond_func=(Item_func_match *)cond;
5438     else if (func->arg_count == 2)
5439     {
5440       Item *arg0=(Item *)(func->arguments()[0]),
5441            *arg1=(Item *)(func->arguments()[1]);
5442       if (arg1->const_item() && arg1->cols() == 1 &&
5443            arg0->type() == Item::FUNC_ITEM &&
5444            ((Item_func *) arg0)->functype() == Item_func::FT_FUNC &&
5445           ((functype == Item_func::GE_FUNC && arg1->val_real() > 0) ||
5446            (functype == Item_func::GT_FUNC && arg1->val_real() >=0)))
5447         cond_func= (Item_func_match *) arg0;
5448       else if (arg0->const_item() &&
5449                 arg1->type() == Item::FUNC_ITEM &&
5450                 ((Item_func *) arg1)->functype() == Item_func::FT_FUNC &&
5451                ((functype == Item_func::LE_FUNC && arg0->val_real() > 0) ||
5452                 (functype == Item_func::LT_FUNC && arg0->val_real() >=0)))
5453         cond_func= (Item_func_match *) arg1;
5454     }
5455   }
5456   else if (cond->type() == Item::COND_ITEM)
5457   {
5458     List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
5459 
5460     if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
5461     {
5462       Item *item;
5463       while ((item=li++))
5464       {
5465         if (add_ft_keys(keyuse_array,stat,item,usable_tables))
5466           return TRUE;
5467       }
5468     }
5469   }
5470 
5471   if (!cond_func || cond_func->key == NO_SUCH_KEY ||
5472       !(usable_tables & cond_func->table->map))
5473     return FALSE;
5474 
5475   const Key_use keyuse(cond_func->table,
5476                        cond_func,
5477                        cond_func->key_item()->used_tables(),
5478                        cond_func->key,
5479                        FT_KEYPART,
5480                        0,             // optimize
5481                        0,             // keypart_map
5482                        ~(ha_rows)0,   // ref_table_rows
5483                        false,         // null_rejecting
5484                        NULL,          // cond_guard
5485                        UINT_MAX);     // sj_pred_no
5486   return keyuse_array->push_back(keyuse);
5487 }
5488 
5489 
sort_keyuse(Key_use * a,Key_use * b)5490 static int sort_keyuse(Key_use *a, Key_use *b)
5491 {
5492   int res;
5493   if (a->table->tablenr != b->table->tablenr)
5494     return (int) (a->table->tablenr - b->table->tablenr);
5495   if (a->key != b->key)
5496     return (int) (a->key - b->key);
5497   if (a->keypart != b->keypart)
5498     return (int) (a->keypart - b->keypart);
5499   // Place const values before other ones
5500   if ((res= MY_TEST((a->used_tables & ~OUTER_REF_TABLE_BIT)) -
5501        MY_TEST((b->used_tables & ~OUTER_REF_TABLE_BIT))))
5502     return res;
5503   /* Place rows that are not 'OPTIMIZE_REF_OR_NULL' first */
5504   return (int) ((a->optimize & KEY_OPTIMIZE_REF_OR_NULL) -
5505 		(b->optimize & KEY_OPTIMIZE_REF_OR_NULL));
5506 }
5507 
5508 
5509 /*
5510   Add to Key_field array all 'ref' access candidates within nested join.
5511 
5512     This function populates Key_field array with entries generated from the
5513     ON condition of the given nested join, and does the same for nested joins
5514     contained within this nested join.
5515 
5516   @param[in]      nested_join_table   Nested join pseudo-table to process
5517   @param[in,out]  end                 End of the key field array
5518   @param[in,out]  and_level           And-level
5519   @param[in,out]  sargables           Array of found sargable candidates
5520 
5521 
5522   @note
5523     We can add accesses to the tables that are direct children of this nested
5524     join (1), and are not inner tables w.r.t their neighbours (2).
5525 
5526     Example for #1 (outer brackets pair denotes nested join this function is
5527     invoked for):
5528     @code
5529      ... LEFT JOIN (t1 LEFT JOIN (t2 ... ) ) ON cond
5530     @endcode
5531     Example for #2:
5532     @code
5533      ... LEFT JOIN (t1 LEFT JOIN t2 ) ON cond
5534     @endcode
5535     In examples 1-2 for condition cond, we can add 'ref' access candidates to
5536     t1 only.
5537     Example #3:
5538     @code
5539      ... LEFT JOIN (t1, t2 LEFT JOIN t3 ON inner_cond) ON cond
5540     @endcode
5541     Here we can add 'ref' access candidates for t1 and t2, but not for t3.
5542 */
5543 
add_key_fields_for_nj(JOIN * join,TABLE_LIST * nested_join_table,Key_field ** end,uint * and_level,SARGABLE_PARAM ** sargables)5544 static void add_key_fields_for_nj(JOIN *join, TABLE_LIST *nested_join_table,
5545                                   Key_field **end, uint *and_level,
5546                                   SARGABLE_PARAM **sargables)
5547 {
5548   List_iterator<TABLE_LIST> li(nested_join_table->nested_join->join_list);
5549   List_iterator<TABLE_LIST> li2(nested_join_table->nested_join->join_list);
5550   bool have_another = FALSE;
5551   table_map tables= 0;
5552   TABLE_LIST *table;
5553   DBUG_ASSERT(nested_join_table->nested_join);
5554 
5555   while ((table= li++) || (have_another && (li=li2, have_another=FALSE,
5556                                             (table= li++))))
5557   {
5558     if (table->nested_join)
5559     {
5560       if (!table->join_cond())
5561       {
5562         /* It's a semi-join nest. Walk into it as if it wasn't a nest */
5563         have_another= TRUE;
5564         li2= li;
5565         li= List_iterator<TABLE_LIST>(table->nested_join->join_list);
5566       }
5567       else
5568         add_key_fields_for_nj(join, table, end, and_level, sargables);
5569     }
5570     else
5571       if (!table->join_cond())
5572         tables |= table->table->map;
5573   }
5574   if (nested_join_table->join_cond())
5575     add_key_fields(join, end, and_level, nested_join_table->join_cond(), tables,
5576                    sargables);
5577 }
5578 
5579 
5580 /**
5581   Check for the presence of AGGFN(DISTINCT a) queries that may be subject
5582   to loose index scan.
5583 
5584 
5585   Check if the query is a subject to AGGFN(DISTINCT) using loose index scan
5586   (QUICK_GROUP_MIN_MAX_SELECT).
5587   Optionally (if out_args is supplied) will push the arguments of
5588   AGGFN(DISTINCT) to the list
5589 
5590   Check for every COUNT(DISTINCT), AVG(DISTINCT) or
5591   SUM(DISTINCT). These can be resolved by Loose Index Scan as long
5592   as all the aggregate distinct functions refer to the same
5593   fields. Thus:
5594 
5595   SELECT AGGFN(DISTINCT a, b), AGGFN(DISTINCT b, a)... => can use LIS
5596   SELECT AGGFN(DISTINCT a),    AGGFN(DISTINCT a)   ... => can use LIS
5597   SELECT AGGFN(DISTINCT a, b), AGGFN(DISTINCT a)   ... => cannot use LIS
5598   SELECT AGGFN(DISTINCT a),    AGGFN(DISTINCT b)   ... => cannot use LIS
5599   etc.
5600 
5601   @param      join       the join to check
5602   @param[out] out_args   Collect the arguments of the aggregate functions
5603                          to a list. We don't worry about duplicates as
5604                          these will be sorted out later in
5605                          get_best_group_min_max.
5606 
5607   @return                does the query qualify for indexed AGGFN(DISTINCT)
5608     @retval   true       it does
5609     @retval   false      AGGFN(DISTINCT) must apply distinct in it.
5610 */
5611 
5612 bool
is_indexed_agg_distinct(JOIN * join,List<Item_field> * out_args)5613 is_indexed_agg_distinct(JOIN *join, List<Item_field> *out_args)
5614 {
5615   Item_sum **sum_item_ptr;
5616   bool result= false;
5617   Field_map first_aggdistinct_fields;
5618 
5619   if (join->primary_tables > 1 ||             /* reference more than 1 table */
5620       join->select_distinct ||                /* or a DISTINCT */
5621       join->select_lex->olap == ROLLUP_TYPE)  /* Check (B3) for ROLLUP */
5622     return false;
5623 
5624   if (join->make_sum_func_list(join->all_fields, join->fields_list, true))
5625     return false;
5626 
5627   for (sum_item_ptr= join->sum_funcs; *sum_item_ptr; sum_item_ptr++)
5628   {
5629     Item_sum *sum_item= *sum_item_ptr;
5630     Field_map cur_aggdistinct_fields;
5631     Item *expr;
5632     /* aggregate is not AGGFN(DISTINCT) or more than 1 argument to it */
5633     switch (sum_item->sum_func())
5634     {
5635       case Item_sum::MIN_FUNC:
5636       case Item_sum::MAX_FUNC:
5637         continue;
5638       case Item_sum::COUNT_DISTINCT_FUNC:
5639         break;
5640       case Item_sum::AVG_DISTINCT_FUNC:
5641       case Item_sum::SUM_DISTINCT_FUNC:
5642         if (sum_item->get_arg_count() == 1)
5643           break;
5644         /* fall through */
5645       default: return false;
5646     }
5647 
5648     for (uint i= 0; i < sum_item->get_arg_count(); i++)
5649     {
5650       expr= sum_item->get_arg(i);
5651       /* The AGGFN(DISTINCT) arg is not an attribute? */
5652       if (expr->real_item()->type() != Item::FIELD_ITEM)
5653         return false;
5654 
5655       Item_field* item= static_cast<Item_field*>(expr->real_item());
5656       if (out_args)
5657         out_args->push_back(item);
5658 
5659       cur_aggdistinct_fields.set_bit(item->field->field_index);
5660       result= true;
5661     }
5662     /*
5663       If there are multiple aggregate functions, make sure that they all
5664       refer to exactly the same set of columns.
5665     */
5666     if (first_aggdistinct_fields.is_clear_all())
5667       first_aggdistinct_fields.merge(cur_aggdistinct_fields);
5668     else if (first_aggdistinct_fields != cur_aggdistinct_fields)
5669       return false;
5670   }
5671 
5672   return result;
5673 }
5674 
5675 
5676 /**
5677   Print keys that were appended to join_tab->const_keys because they
5678   can be used for GROUP BY or DISTINCT to the optimizer trace.
5679 
5680   @param trace     The optimizer trace context we're adding info to
5681   @param join_tab  The table the indices cover
5682   @param new_keys  The keys that are considered useful because they can
5683                    be used for GROUP BY or DISTINCT
5684   @param cause     Zero-terminated string with reason for adding indices
5685                    to const_keys
5686 
5687   @see add_group_and_distinct_keys()
5688  */
trace_indices_added_group_distinct(Opt_trace_context * trace,const JOIN_TAB * join_tab,const key_map new_keys,const char * cause)5689 static void trace_indices_added_group_distinct(Opt_trace_context *trace,
5690                                                const JOIN_TAB *join_tab,
5691                                                const key_map new_keys,
5692                                                const char* cause)
5693 {
5694 #ifdef OPTIMIZER_TRACE
5695   if (likely(!trace->is_started()))
5696     return;
5697 
5698   KEY *key_info= join_tab->table->key_info;
5699   key_map existing_keys= join_tab->const_keys;
5700   uint nbrkeys= join_tab->table->s->keys;
5701 
5702   Opt_trace_object trace_summary(trace, "const_keys_added");
5703   {
5704     Opt_trace_array trace_key(trace,"keys");
5705     for (uint j= 0 ; j < nbrkeys ; j++)
5706       if (new_keys.is_set(j) && !existing_keys.is_set(j))
5707         trace_key.add_utf8(key_info[j].name);
5708   }
5709   trace_summary.add_alnum("cause", cause);
5710 #endif
5711 }
5712 
5713 
5714 /**
5715   Discover the indexes that might be used for GROUP BY or DISTINCT queries.
5716 
5717   If the query has a GROUP BY clause, find all indexes that contain
5718   all GROUP BY fields, and add those indexes to join_tab->const_keys
5719   and join_tab->keys.
5720 
5721   If the query has a DISTINCT clause, find all indexes that contain
5722   all SELECT fields, and add those indexes to join_tab->const_keys and
5723   join_tab->keys. This allows later on such queries to be processed by
5724   a QUICK_GROUP_MIN_MAX_SELECT.
5725 
5726   Note that indexes that are not usable for resolving GROUP
5727   BY/DISTINCT may also be added in some corner cases. For example, an
5728   index covering 'a' and 'b' is not usable for the following query but
5729   is still added: "SELECT DISTINCT a+b FROM t1". This is not a big
5730   issue because a) although the optimizer will consider using the
5731   index, it will not chose it (so minor calculation cost added but not
5732   wrong result) and b) it applies only to corner cases.
5733 
5734   @param join
5735   @param join_tab
5736 
5737   @return
5738     None
5739 */
5740 
5741 static void
add_group_and_distinct_keys(JOIN * join,JOIN_TAB * join_tab)5742 add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab)
5743 {
5744   List<Item_field> indexed_fields;
5745   List_iterator<Item_field> indexed_fields_it(indexed_fields);
5746   ORDER      *cur_group;
5747   Item_field *cur_item;
5748   const char *cause;
5749 
5750   if (join->group_list)
5751   { /* Collect all query fields referenced in the GROUP clause. */
5752     for (cur_group= join->group_list; cur_group; cur_group= cur_group->next)
5753       (*cur_group->item)->walk(&Item::collect_item_field_processor, 0,
5754                                (uchar*) &indexed_fields);
5755     cause= "group_by";
5756   }
5757   else if (join->select_distinct)
5758   { /* Collect all query fields referenced in the SELECT clause. */
5759     List<Item> &select_items= join->fields_list;
5760     List_iterator<Item> select_items_it(select_items);
5761     Item *item;
5762     while ((item= select_items_it++))
5763       item->walk(&Item::collect_item_field_processor, 0,
5764                  (uchar*) &indexed_fields);
5765     cause= "distinct";
5766   }
5767   else if (join->tmp_table_param.sum_func_count &&
5768            is_indexed_agg_distinct(join, &indexed_fields))
5769   {
5770     /*
5771       SELECT list with AGGFN(distinct col). The query qualifies for
5772       loose index scan, and is_indexed_agg_distinct() has already
5773       collected all referenced fields into indexed_fields.
5774     */
5775     join->sort_and_group= 1;
5776     cause= "indexed_distinct_aggregate";
5777   }
5778   else
5779     return;
5780 
5781   if (indexed_fields.elements == 0)
5782     return;
5783 
5784   key_map possible_keys;
5785   possible_keys.set_all();
5786 
5787   /* Intersect the keys of all group fields. */
5788   while ((cur_item= indexed_fields_it++))
5789   {
5790     if (cur_item->used_tables() != join_tab->table->map)
5791     {
5792       /*
5793         Doing GROUP BY or DISTINCT on a field in another table so no
5794         index in this table is usable
5795       */
5796       return;
5797     }
5798     else
5799       possible_keys.intersect(cur_item->field->part_of_key);
5800   }
5801 
5802   /*
5803     At this point, possible_keys has key bits set only for usable
5804     indexes because indexed_fields is non-empty and if any of the
5805     fields belong to a different table the function would exit in the
5806     loop above.
5807   */
5808 
5809   if (!possible_keys.is_clear_all() &&
5810       !possible_keys.is_subset(join_tab->const_keys))
5811   {
5812     trace_indices_added_group_distinct(&join->thd->opt_trace, join_tab,
5813                                        possible_keys, cause);
5814     join_tab->const_keys.merge(possible_keys);
5815     join_tab->keys.merge(possible_keys);
5816   }
5817 
5818 }
5819 
5820 /**
5821   Update keyuse array with all possible keys we can use to fetch rows.
5822 
5823   @param       thd
5824   @param[out]  keyuse         Put here ordered array of Key_use structures
5825   @param       join_tab       Array in tablenr_order
5826   @param       tables         Number of tables in join
5827   @param       cond           WHERE condition (note that the function analyzes
5828                               join_tab[i]->join_cond() too)
5829   @param       normal_tables  Tables not inner w.r.t some outer join (ones
5830                               for which we can make ref access based the WHERE
5831                               clause)
5832   @param       select_lex     current SELECT
5833   @param[out]  sargables      Array of found sargable candidates
5834 
5835    @retval
5836      0  OK
5837    @retval
5838      1  Out of memory.
5839 */
5840 
5841 static bool
update_ref_and_keys(THD * thd,Key_use_array * keyuse,JOIN_TAB * join_tab,uint tables,Item * cond,COND_EQUAL * cond_equal,table_map normal_tables,SELECT_LEX * select_lex,SARGABLE_PARAM ** sargables)5842 update_ref_and_keys(THD *thd, Key_use_array *keyuse,JOIN_TAB *join_tab,
5843                     uint tables, Item *cond, COND_EQUAL *cond_equal,
5844                     table_map normal_tables, SELECT_LEX *select_lex,
5845                     SARGABLE_PARAM **sargables)
5846 {
5847   uint	and_level,i,found_eq_constant;
5848   Key_field *key_fields, *end, *field;
5849   uint sz;
5850   uint m= max(select_lex->max_equal_elems, 1U);
5851 
5852   /*
5853     We use the same piece of memory to store both  Key_field
5854     and SARGABLE_PARAM structure.
5855     Key_field values are placed at the beginning this memory
5856     while  SARGABLE_PARAM values are put at the end.
5857     All predicates that are used to fill arrays of Key_field
5858     and SARGABLE_PARAM structures have at most 2 arguments
5859     except BETWEEN predicates that have 3 arguments and
5860     IN predicates.
5861     This any predicate if it's not BETWEEN/IN can be used
5862     directly to fill at most 2 array elements, either of Key_field
5863     or SARGABLE_PARAM type. For a BETWEEN predicate 3 elements
5864     can be filled as this predicate is considered as
5865     saragable with respect to each of its argument.
5866     An IN predicate can require at most 1 element as currently
5867     it is considered as sargable only for its first argument.
5868     Multiple equality can add  elements that are filled after
5869     substitution of field arguments by equal fields. There
5870     can be not more than select_lex->max_equal_elems such
5871     substitutions.
5872   */
5873   sz= max(sizeof(Key_field), sizeof(SARGABLE_PARAM)) *
5874       (((select_lex->cond_count + 1) * 2 +
5875 	select_lex->between_count) * m + 1);
5876   if (!(key_fields=(Key_field*)	thd->alloc(sz)))
5877     return TRUE; /* purecov: inspected */
5878   and_level= 0;
5879   field= end= key_fields;
5880   *sargables= (SARGABLE_PARAM *) key_fields +
5881                 (sz - sizeof((*sargables)[0].field))/sizeof(SARGABLE_PARAM);
5882   /* set a barrier for the array of SARGABLE_PARAM */
5883   (*sargables)[0].field= 0;
5884 
5885   if (cond)
5886   {
5887     add_key_fields(join_tab->join, &end, &and_level, cond, normal_tables,
5888                    sargables);
5889     for (Key_field *fld= field; fld != end ; fld++)
5890     {
5891       /* Mark that we can optimize LEFT JOIN */
5892       if (fld->val->type() == Item::NULL_ITEM &&
5893           !fld->field->real_maybe_null())
5894       {
5895         /*
5896           Example:
5897           SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.a WHERE t2.a IS NULL;
5898           this just wants rows of t1 where t1.a does not exist in t2.
5899         */
5900         fld->field->table->reginfo.not_exists_optimize=1;
5901       }
5902     }
5903   }
5904 
5905   for (i=0 ; i < tables ; i++)
5906   {
5907     /*
5908       Block the creation of keys for inner tables of outer joins.
5909       Here only the outer joins that can not be converted to
5910       inner joins are left and all nests that can be eliminated
5911       are flattened.
5912       In the future when we introduce conditional accesses
5913       for inner tables in outer joins these keys will be taken
5914       into account as well.
5915     */
5916     if (*join_tab[i].on_expr_ref)
5917       add_key_fields(join_tab->join, &end, &and_level,
5918                      *join_tab[i].on_expr_ref,
5919                      join_tab[i].table->map, sargables);
5920   }
5921 
5922   /* Process ON conditions for the nested joins */
5923   {
5924     List_iterator<TABLE_LIST> li(*join_tab->join->join_list);
5925     TABLE_LIST *table;
5926     while ((table= li++))
5927     {
5928       if (table->nested_join)
5929         add_key_fields_for_nj(join_tab->join, table, &end, &and_level,
5930                               sargables);
5931     }
5932   }
5933 
5934   /* Generate keys descriptions for derived tables */
5935   if (select_lex->materialized_table_count)
5936   {
5937     if (select_lex->join->generate_derived_keys())
5938       return true;
5939   }
5940   /* fill keyuse with found key parts */
5941   for ( ; field != end ; field++)
5942   {
5943     if (add_key_part(keyuse,field))
5944       return true;
5945   }
5946 
5947   if (select_lex->ftfunc_list->elements)
5948   {
5949     if (add_ft_keys(keyuse,join_tab,cond,normal_tables))
5950       return true;
5951   }
5952 
5953   /*
5954     Sort the array of possible keys and remove the following key parts:
5955     - ref if there is a keypart which is a ref and a const.
5956       (e.g. if there is a key(a,b) and the clause is a=3 and b=7 and b=t2.d,
5957       then we skip the key part corresponding to b=t2.d)
5958     - keyparts without previous keyparts
5959       (e.g. if there is a key(a,b,c) but only b < 5 (or a=2 and c < 3) is
5960       used in the query, we drop the partial key parts from consideration).
5961     Special treatment for ft-keys.
5962   */
5963   if (!keyuse->empty())
5964   {
5965     Key_use *save_pos, *use;
5966 
5967     my_qsort(keyuse->begin(), keyuse->size(), keyuse->element_size(),
5968              reinterpret_cast<qsort_cmp>(sort_keyuse));
5969 
5970     const Key_use key_end(NULL, NULL, 0, 0, 0, 0, 0, 0, false, NULL, 0);
5971     if (keyuse->push_back(key_end)) // added for easy testing
5972       return TRUE;
5973 
5974     use= save_pos= keyuse->begin();
5975     const Key_use *prev= &key_end;
5976     found_eq_constant=0;
5977     for (i=0 ; i < keyuse->size()-1 ; i++,use++)
5978     {
5979       if (!use->used_tables && use->optimize != KEY_OPTIMIZE_REF_OR_NULL)
5980 	use->table->const_key_parts[use->key]|= use->keypart_map;
5981       if (use->keypart != FT_KEYPART)
5982       {
5983 	if (use->key == prev->key && use->table == prev->table)
5984 	{
5985 	  if (prev->keypart+1 < use->keypart ||
5986 	      (prev->keypart == use->keypart && found_eq_constant))
5987 	    continue;				/* remove */
5988 	}
5989 	else if (use->keypart != 0)		// First found must be 0
5990 	  continue;
5991       }
5992 
5993 #if defined(__GNUC__) && !MY_GNUC_PREREQ(4,4)
5994       /*
5995         Old gcc used a memcpy(), which is undefined if save_pos==use:
5996         http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19410
5997         http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39480
5998       */
5999       if (save_pos != use)
6000 #endif
6001         *save_pos= *use;
6002       prev=use;
6003       found_eq_constant= !use->used_tables;
6004       /* Save ptr to first use */
6005       if (!use->table->reginfo.join_tab->keyuse)
6006 	use->table->reginfo.join_tab->keyuse=save_pos;
6007       use->table->reginfo.join_tab->checked_keys.set_bit(use->key);
6008       save_pos++;
6009     }
6010     i= (uint) (save_pos - keyuse->begin());
6011     keyuse->at(i) = key_end;
6012     keyuse->chop(i);
6013   }
6014   print_keyuse_array(&thd->opt_trace, keyuse);
6015 
6016   return false;
6017 }
6018 
6019 
6020 /**
6021   Create a keyuse array for a table with a primary key.
6022   To be used when creating a materialized temporary table.
6023 
6024   @param thd         THD pointer, for memory allocation
6025   @param table       Table object representing table
6026   @param keyparts    Number of key parts in the primary key
6027   @param outer_exprs List of items used for key lookup
6028 
6029   @return Pointer to created keyuse array, or NULL if error
6030 */
create_keyuse_for_table(THD * thd,TABLE * table,uint keyparts,Item_field ** fields,List<Item> outer_exprs)6031 Key_use_array *create_keyuse_for_table(THD *thd, TABLE *table, uint keyparts,
6032                                        Item_field **fields,
6033                                        List<Item> outer_exprs)
6034 {
6035   void *mem= thd->alloc(sizeof(Key_use_array));
6036   if (!mem)
6037     return NULL;
6038   Key_use_array *keyuses= new (mem) Key_use_array(thd->mem_root);
6039 
6040   List_iterator<Item> outer_expr(outer_exprs);
6041 
6042   for (uint keypartno= 0; keypartno < keyparts; keypartno++)
6043   {
6044     Item *const item= outer_expr++;
6045     Key_field key_field(fields[keypartno]->field, item, 0, 0, true,
6046                         // null_rejecting must be true for field items only,
6047                         // add_not_null_conds() is incapable of handling
6048                         // other item types.
6049                         (item->type() == Item::FIELD_ITEM),
6050                         NULL, UINT_MAX);
6051     if (add_key_part(keyuses, &key_field))
6052       return NULL;
6053   }
6054   const Key_use key_end(NULL, NULL, 0, 0, 0, 0, 0, 0, false, NULL, 0);
6055   if (keyuses->push_back(key_end)) // added for easy testing
6056     return NULL;
6057 
6058   return keyuses;
6059 }
6060 
6061 
6062 /** Save const tables first as used tables. */
6063 
6064 static void
set_position(JOIN * join,uint idx,JOIN_TAB * table,Key_use * key)6065 set_position(JOIN *join, uint idx, JOIN_TAB *table, Key_use *key)
6066 {
6067   join->positions[idx].table= table;
6068   join->positions[idx].key=key;
6069   join->positions[idx].records_read=1.0;	/* This is a const table */
6070   join->positions[idx].ref_depend_map= 0;
6071 
6072   join->positions[idx].loosescan_key= MAX_KEY; /* Not a LooseScan */
6073   join->positions[idx].sj_strategy= SJ_OPT_NONE;
6074   join->positions[idx].use_join_buffer= FALSE;
6075 
6076   /* Move the const table as down as possible in best_ref */
6077   JOIN_TAB **pos=join->best_ref+idx+1;
6078   JOIN_TAB *next=join->best_ref[idx];
6079   for (;next != table ; pos++)
6080   {
6081     JOIN_TAB *tmp=pos[0];
6082     pos[0]=next;
6083     next=tmp;
6084   }
6085   join->best_ref[idx]=table;
6086 }
6087 
6088 
6089 /**
6090   Fill in outer join related info for the execution plan structure.
6091 
6092     For each outer join operation left after simplification of the
6093     original query the function set up the following pointers in the linear
6094     structure join->join_tab representing the selected execution plan.
6095     The first inner table t0 for the operation is set to refer to the last
6096     inner table tk through the field t0->last_inner.
6097     Any inner table ti for the operation are set to refer to the first
6098     inner table ti->first_inner.
6099     The first inner table t0 for the operation is set to refer to the
6100     first inner table of the embedding outer join operation, if there is any,
6101     through the field t0->first_upper.
6102     The on expression for the outer join operation is attached to the
6103     corresponding first inner table through the field t0->on_expr_ref.
6104     Here ti are structures of the JOIN_TAB type.
6105 
6106   EXAMPLE. For the query:
6107   @code
6108         SELECT * FROM t1
6109                       LEFT JOIN
6110                       (t2, t3 LEFT JOIN t4 ON t3.a=t4.a)
6111                       ON (t1.a=t2.a AND t1.b=t3.b)
6112           WHERE t1.c > 5,
6113   @endcode
6114 
6115     given the execution plan with the table order t1,t2,t3,t4
6116     is selected, the following references will be set;
6117     t4->last_inner=[t4], t4->first_inner=[t4], t4->first_upper=[t2]
6118     t2->last_inner=[t4], t2->first_inner=t3->first_inner=[t2],
6119     on expression (t1.a=t2.a AND t1.b=t3.b) will be attached to
6120     *t2->on_expr_ref, while t3.a=t4.a will be attached to *t4->on_expr_ref.
6121 
6122   @param join   reference to the info fully describing the query
6123 
6124   @note
6125     The function assumes that the simplification procedure has been
6126     already applied to the join query (see simplify_joins).
6127     This function can be called only after the execution plan
6128     has been chosen.
6129 */
6130 
6131 static void
make_outerjoin_info(JOIN * join)6132 make_outerjoin_info(JOIN *join)
6133 {
6134   DBUG_ENTER("make_outerjoin_info");
6135 
6136   DBUG_ASSERT(join->outer_join);
6137 
6138   for (uint i= join->const_tables; i < join->tables; i++)
6139   {
6140     JOIN_TAB   *const tab= join->join_tab + i;
6141     TABLE      *const table= tab->table;
6142 
6143     if (!table)
6144       continue;
6145 
6146     TABLE_LIST *const tbl= table->pos_in_table_list;
6147 
6148     if (tbl->outer_join)
6149     {
6150       /*
6151         Table tab is the only one inner table for outer join.
6152         (Like table t4 for the table reference t3 LEFT JOIN t4 ON t3.a=t4.a
6153         is in the query above.)
6154       */
6155       tab->last_inner= tab->first_inner= tab;
6156       tab->on_expr_ref= tbl->join_cond_ref();
6157       tab->cond_equal= tbl->cond_equal;
6158       /*
6159         If this outer join nest is embedded in another join nest,
6160         link the join-tabs:
6161       */
6162       TABLE_LIST *const outer_join_nest= tbl->outer_join_nest();
6163       if (outer_join_nest)
6164         tab->first_upper= outer_join_nest->nested_join->first_nested;
6165     }
6166     for (TABLE_LIST *embedding= tbl->embedding;
6167          embedding;
6168          embedding= embedding->embedding)
6169     {
6170       // Ignore join nests that are not outer join nests:
6171       if (!embedding->join_cond())
6172         continue;
6173       NESTED_JOIN *const nested_join= embedding->nested_join;
6174       if (!nested_join->nj_counter)
6175       {
6176         /*
6177           Table tab is the first inner table for nested_join.
6178           Save reference to it in the nested join structure.
6179         */
6180         nested_join->first_nested= tab;
6181         tab->on_expr_ref= embedding->join_cond_ref();
6182         tab->cond_equal= tbl->cond_equal;
6183 
6184         TABLE_LIST *const outer_join_nest= embedding->outer_join_nest();
6185         if (outer_join_nest)
6186           tab->first_upper= outer_join_nest->nested_join->first_nested;
6187       }
6188       if (!tab->first_inner)
6189         tab->first_inner= nested_join->first_nested;
6190       if (++nested_join->nj_counter < nested_join->nj_total)
6191         break;
6192       /* Table tab is the last inner table for nested join. */
6193       nested_join->first_nested->last_inner= tab;
6194     }
6195   }
6196   DBUG_VOID_RETURN;
6197 }
6198 
6199 /**
6200   Build a predicate guarded by match variables for embedding outer joins.
6201   The function recursively adds guards for predicate cond
6202   assending from tab to the first inner table  next embedding
6203   nested outer join and so on until it reaches root_tab
6204   (root_tab can be 0).
6205 
6206   @param tab       the first inner table for most nested outer join
6207   @param cond      the predicate to be guarded (must be set)
6208   @param root_tab  the first inner table to stop
6209 
6210   @return
6211     -  pointer to the guarded predicate, if success
6212     -  0, otherwise
6213 */
6214 
6215 static Item*
add_found_match_trig_cond(JOIN_TAB * tab,Item * cond,JOIN_TAB * root_tab)6216 add_found_match_trig_cond(JOIN_TAB *tab, Item *cond, JOIN_TAB *root_tab)
6217 {
6218   Item *tmp;
6219   DBUG_ASSERT(cond != 0);
6220   if (tab == root_tab)
6221     return cond;
6222   if ((tmp= add_found_match_trig_cond(tab->first_upper, cond, root_tab)))
6223     tmp= new Item_func_trig_cond(tmp, &tab->found, tab,
6224                                  Item_func_trig_cond::FOUND_MATCH);
6225   if (tmp)
6226   {
6227     tmp->quick_fix_field();
6228     tmp->update_used_tables();
6229   }
6230   return tmp;
6231 }
6232 
6233 
6234 /**
6235    Local helper function for make_join_select().
6236 
6237    Push down conditions from all on expressions.
6238    Each of these conditions are guarded by a variable
6239    that turns if off just before null complemented row for
6240    outer joins is formed. Thus, the condition from an
6241    'on expression' are guaranteed not to be checked for
6242    the null complemented row.
6243 */
pushdown_on_conditions(JOIN * join,JOIN_TAB * last_tab)6244 static bool pushdown_on_conditions(JOIN* join, JOIN_TAB *last_tab)
6245 {
6246   DBUG_ENTER("pushdown_on_conditions");
6247 
6248   /* First push down constant conditions from on expressions */
6249   for (JOIN_TAB *join_tab= join->join_tab+join->const_tables;
6250        join_tab < join->join_tab+join->tables ; join_tab++)
6251   {
6252     if (join_tab->on_expr_ref && *join_tab->on_expr_ref)
6253     {
6254       JOIN_TAB *cond_tab= join_tab->first_inner;
6255       Item *tmp_cond= make_cond_for_table(*join_tab->on_expr_ref,
6256                                           join->const_table_map,
6257                                           (table_map) 0, 0);
6258       if (!tmp_cond)
6259         continue;
6260       tmp_cond= new
6261         Item_func_trig_cond(tmp_cond, &cond_tab->not_null_compl, cond_tab,
6262                             Item_func_trig_cond::IS_NOT_NULL_COMPL);
6263       if (!tmp_cond)
6264         DBUG_RETURN(true);
6265       tmp_cond->quick_fix_field();
6266 
6267       if (cond_tab->and_with_jt_and_sel_condition(tmp_cond, __LINE__))
6268         DBUG_RETURN(true);
6269     }
6270   }
6271 
6272   JOIN_TAB *first_inner_tab= last_tab->first_inner;
6273 
6274   /* Push down non-constant conditions from on expressions */
6275   while (first_inner_tab && first_inner_tab->last_inner == last_tab)
6276   {
6277     /*
6278        Table last_tab is the last inner table of an outer join.
6279        An on expression is always attached to it.
6280     */
6281     Item *on_expr= *first_inner_tab->on_expr_ref;
6282 
6283     for (JOIN_TAB *join_tab= join->join_tab+join->const_tables;
6284          join_tab <= last_tab ; join_tab++)
6285     {
6286       table_map prefix_tables= join_tab->prefix_tables();
6287       table_map added_tables= join_tab->added_tables();
6288 
6289       if (join_tab == last_tab)
6290       {
6291         /*
6292           Need RAND_TABLE_BIT on the last inner table, in case there is a
6293           non-deterministic function in the join condition.
6294           (RAND_TABLE_BIT is set for the last table of the join plan,
6295            but this is not sufficient for join conditions, which may have a
6296            last inner table that is ahead of the last table of the join plan).
6297         */
6298         prefix_tables|= RAND_TABLE_BIT;
6299         added_tables|= RAND_TABLE_BIT;
6300       }
6301       Item *tmp_cond= make_cond_for_table(on_expr, prefix_tables, added_tables,
6302                                           false);
6303       if (!tmp_cond)
6304         continue;
6305 
6306       JOIN_TAB *cond_tab=
6307         join_tab < first_inner_tab ? first_inner_tab : join_tab;
6308       /*
6309         First add the guards for match variables of
6310         all embedding outer join operations.
6311       */
6312       if (!(tmp_cond= add_found_match_trig_cond(cond_tab->first_inner,
6313                                                 tmp_cond,
6314                                                 first_inner_tab)))
6315         DBUG_RETURN(1);
6316       /*
6317          Now add the guard turning the predicate off for
6318          the null complemented row.
6319       */
6320       tmp_cond=
6321         new Item_func_trig_cond(tmp_cond, &first_inner_tab->not_null_compl,
6322                                 first_inner_tab,
6323                                 Item_func_trig_cond::IS_NOT_NULL_COMPL);
6324       if (!tmp_cond)
6325         DBUG_RETURN(true);
6326       tmp_cond->quick_fix_field();
6327 
6328       /* Add the predicate to other pushed down predicates */
6329       if (cond_tab->and_with_jt_and_sel_condition(tmp_cond, __LINE__))
6330         DBUG_RETURN(true);
6331     }
6332     first_inner_tab= first_inner_tab->first_upper;
6333   }
6334   DBUG_RETURN(0);
6335 }
6336 
6337 
6338 /*****************************************************************************
6339   Remove calculation with tables that aren't yet read. Remove also tests
6340   against fields that are read through key where the table is not a
6341   outer join table.
6342   We can't remove tests that are made against columns which are stored
6343   in sorted order.
6344 *****************************************************************************/
6345 
6346 static Item *
part_of_refkey(TABLE * table,Field * field)6347 part_of_refkey(TABLE *table,Field *field)
6348 {
6349   if (!table->reginfo.join_tab)
6350     return NULL;                  // field from outer non-select (UPDATE,...)
6351 
6352   uint ref_parts=table->reginfo.join_tab->ref.key_parts;
6353   if (ref_parts)
6354   {
6355     if (table->reginfo.join_tab->has_guarded_conds())
6356       return NULL;
6357 
6358     const KEY_PART_INFO *key_part=
6359       table->key_info[table->reginfo.join_tab->ref.key].key_part;
6360 
6361     for (uint part=0 ; part < ref_parts ; part++,key_part++)
6362       if (field->eq(key_part->field) &&
6363 	  !(key_part->key_part_flag & HA_PART_KEY_SEG))
6364 	return table->reginfo.join_tab->ref.items[part];
6365   }
6366   return NULL;
6367 }
6368 
6369 
6370 /**
6371   @return
6372     1 if right_item is used removable reference key on left_item
6373 
6374   @note see comments in make_cond_for_table_from_pred() about careful
6375   usage/modifications of test_if_ref().
6376 */
6377 
test_if_ref(Item * root_cond,Item_field * left_item,Item * right_item)6378 static bool test_if_ref(Item *root_cond,
6379                         Item_field *left_item,Item *right_item)
6380 {
6381   Field *field=left_item->field;
6382   JOIN_TAB *join_tab= field->table->reginfo.join_tab;
6383   // No need to change const test
6384   if (!field->table->const_table && join_tab &&
6385       (!join_tab->first_inner ||
6386        *join_tab->first_inner->on_expr_ref == root_cond) &&
6387       /* "ref_or_null" implements "x=y or x is null", not "x=y" */
6388       (join_tab->type != JT_REF_OR_NULL))
6389   {
6390     Item *ref_item=part_of_refkey(field->table,field);
6391     if (ref_item && ref_item->eq(right_item,1))
6392     {
6393       right_item= right_item->real_item();
6394       if (right_item->type() == Item::FIELD_ITEM)
6395 	return (field->eq_def(((Item_field *) right_item)->field));
6396       /* remove equalities injected by IN->EXISTS transformation */
6397       else if (right_item->type() == Item::CACHE_ITEM)
6398         return ((Item_cache *)right_item)->eq_def (field);
6399       if (right_item->const_item() && !(right_item->is_null()))
6400       {
6401 	/*
6402 	  We can remove binary fields and numerical fields except float,
6403 	  as float comparison isn't 100 % secure
6404 	  We have to keep normal strings to be able to check for end spaces
6405 
6406           sergefp: the above seems to be too restrictive. Counterexample:
6407             create table t100 (v varchar(10), key(v)) default charset=latin1;
6408             insert into t100 values ('a'),('a ');
6409             explain select * from t100 where v='a';
6410           The EXPLAIN shows 'using Where'. Running the query returns both
6411           rows, so it seems there are no problems with endspace in the most
6412           frequent case?
6413 	*/
6414 	if (field->binary() &&
6415 	    field->real_type() != MYSQL_TYPE_STRING &&
6416 	    field->real_type() != MYSQL_TYPE_VARCHAR &&
6417 	    (field->type() != MYSQL_TYPE_FLOAT || field->decimals() == 0))
6418 	{
6419 	  return !right_item->save_in_field_no_warnings(field, true);
6420 	}
6421       }
6422     }
6423   }
6424   return 0;					// keep test
6425 }
6426 
6427 /**
6428    Extract a condition that can be checked after reading given table
6429 
6430    @param cond       Condition to analyze
6431    @param tables     Tables for which "current field values" are available
6432    @param used_table Table that we're extracting the condition for (may
6433                      also include PSEUDO_TABLE_BITS, and may be zero)
6434    @param exclude_expensive_cond  Do not push expensive conditions
6435 
6436    @retval <>NULL Generated condition
6437    @retval =NULL  Already checked, OR error
6438 
6439    @details
6440      Extract the condition that can be checked after reading the table
6441      specified in 'used_table', given that current-field values for tables
6442      specified in 'tables' bitmap are available.
6443      If 'used_table' is 0
6444      - extract conditions for all tables in 'tables'.
6445      - extract conditions are unrelated to any tables
6446        in the same query block/level(i.e. conditions
6447        which have used_tables == 0).
6448 
6449      The function assumes that
6450      - Constant parts of the condition has already been checked.
6451      - Condition that could be checked for tables in 'tables' has already
6452      been checked.
6453 
6454      The function takes into account that some parts of the condition are
6455      guaranteed to be true by employed 'ref' access methods (the code that
6456      does this is located at the end, search down for "EQ_FUNC").
6457 
6458    @note
6459      make_cond_for_info_schema() uses an algorithm similar to
6460      make_cond_for_table().
6461 */
6462 
6463 /**
6464    Destructively replaces a sub-condition inside a condition tree. The
6465    parse tree is also altered.
6466 
6467    @note Because of current requirements for semijoin flattening, we do not
6468    need to recurse here, hence this function will only examine the top-level
6469    AND conditions. (see JOIN::prepare, comment starting with "Check if the
6470    subquery predicate can be executed via materialization".)
6471 
6472    @param join The top-level query.
6473 
6474    @param tree Must be the handle to the top level condition. This is needed
6475    when the top-level condition changes.
6476 
6477    @param old_cond The condition to be replaced.
6478 
6479    @param new_cond The condition to be substituted.
6480 
6481    @param do_fix_fields If true, Item::fix_fields(THD*, Item**) is called for
6482    the new condition.
6483 
6484    @return error status
6485 
6486    @retval true If there was an error.
6487    @retval false If successful.
6488 */
6489 
replace_subcondition(JOIN * join,Item ** tree,Item * old_cond,Item * new_cond,bool do_fix_fields)6490 static bool replace_subcondition(JOIN *join, Item **tree,
6491                                  Item *old_cond, Item *new_cond,
6492                                  bool do_fix_fields)
6493 {
6494   if (*tree == old_cond)
6495   {
6496     *tree= new_cond;
6497     if (do_fix_fields && new_cond->fix_fields(join->thd, tree))
6498       return TRUE;
6499     join->select_lex->where= *tree;
6500     return FALSE;
6501   }
6502   else if ((*tree)->type() == Item::COND_ITEM)
6503   {
6504     List_iterator<Item> li(*((Item_cond*)(*tree))->argument_list());
6505     Item *item;
6506     while ((item= li++))
6507     {
6508       if (item == old_cond)
6509       {
6510         li.replace(new_cond);
6511         if (do_fix_fields && new_cond->fix_fields(join->thd, li.ref()))
6512           return TRUE;
6513         return FALSE;
6514       }
6515     }
6516   }
6517   else
6518     // If we came here it means there were an error during prerequisites check.
6519     DBUG_ASSERT(FALSE);
6520 
6521   return TRUE;
6522 }
6523 
6524 
subq_sj_candidate_cmp(Item_exists_subselect * const * el1,Item_exists_subselect * const * el2)6525 static int subq_sj_candidate_cmp(Item_exists_subselect* const *el1,
6526                                  Item_exists_subselect* const *el2)
6527 {
6528   /*
6529     Remove this assert when we support semijoin on non-IN subqueries.
6530   */
6531   DBUG_ASSERT((*el1)->substype() == Item_subselect::IN_SUBS &&
6532               (*el2)->substype() == Item_subselect::IN_SUBS);
6533   return ((*el1)->sj_convert_priority < (*el2)->sj_convert_priority) ? 1 :
6534          ( ((*el1)->sj_convert_priority == (*el2)->sj_convert_priority)? 0 : -1);
6535 }
6536 
6537 
fix_list_after_tbl_changes(st_select_lex * parent_select,st_select_lex * removed_select,List<TABLE_LIST> * tlist)6538 static void fix_list_after_tbl_changes(st_select_lex *parent_select,
6539                                        st_select_lex *removed_select,
6540                                        List<TABLE_LIST> *tlist)
6541 {
6542   List_iterator<TABLE_LIST> it(*tlist);
6543   TABLE_LIST *table;
6544   while ((table= it++))
6545   {
6546     if (table->join_cond())
6547       table->join_cond()->fix_after_pullout(parent_select, removed_select);
6548     if (table->nested_join)
6549       fix_list_after_tbl_changes(parent_select, removed_select,
6550                                  &table->nested_join->join_list);
6551   }
6552 }
6553 
6554 
6555 /**
6556   Convert a subquery predicate into a TABLE_LIST semi-join nest
6557 
6558   @param parent_join Parent join, which has subq_pred in its WHERE/ON clause.
6559   @param subq_pred   Subquery predicate to be converted.
6560                      This is either an IN, =ANY or EXISTS predicate.
6561 
6562   @retval FALSE OK
6563   @retval TRUE  Error
6564 
6565   @details
6566 
6567   The following transformations are performed:
6568 
6569   1. IN/=ANY predicates on the form:
6570 
6571   SELECT ...
6572   FROM ot1 ... otN
6573   WHERE (oe1, ... oeM) IN (SELECT ie1, ..., ieM)
6574                            FROM it1 ... itK
6575                           [WHERE inner-cond])
6576    [AND outer-cond]
6577   [GROUP BY ...] [HAVING ...] [ORDER BY ...]
6578 
6579   are transformed into:
6580 
6581   SELECT ...
6582   FROM (ot1 ... otN) SJ (it1 ... itK)
6583                      ON (oe1, ... oeM) = (ie1, ..., ieM)
6584                         [AND inner-cond]
6585   [WHERE outer-cond]
6586   [GROUP BY ...] [HAVING ...] [ORDER BY ...]
6587 
6588   Notice that the inner-cond may contain correlated and non-correlated
6589   expressions. Further transformations will analyze and break up such
6590   expressions.
6591 
6592   Prepared Statements: the transformation is permanent:
6593    - Changes in TABLE_LIST structures are naturally permanent
6594    - Item tree changes are performed on statement MEM_ROOT:
6595       = we activate statement MEM_ROOT
6596       = this function is called before the first fix_prepare_information call.
6597 
6598   This is intended because the criteria for subquery-to-sj conversion remain
6599   constant for the lifetime of the Prepared Statement.
6600 */
6601 
convert_subquery_to_semijoin(JOIN * parent_join,Item_exists_subselect * subq_pred)6602 static bool convert_subquery_to_semijoin(JOIN *parent_join,
6603                                          Item_exists_subselect *subq_pred)
6604 {
6605   SELECT_LEX *parent_lex= parent_join->select_lex;
6606   TABLE_LIST *emb_tbl_nest= NULL;
6607   List<TABLE_LIST> *emb_join_list= &parent_lex->top_join_list;
6608   THD *thd= parent_join->thd;
6609   DBUG_ENTER("convert_subquery_to_semijoin");
6610 
6611   DBUG_ASSERT(subq_pred->substype() == Item_subselect::IN_SUBS);
6612 
6613   /*
6614     Find out where to insert the semi-join nest and the generated condition.
6615 
6616     For t1 LEFT JOIN t2, embedding_join_nest will be t2.
6617     Note that t2 may be a simple table or may itself be a join nest
6618     (e.g. in the case t1 LEFT JOIN (t2 JOIN t3))
6619   */
6620   if ((void*)subq_pred->embedding_join_nest != NULL)
6621   {
6622     if (subq_pred->embedding_join_nest->nested_join)
6623     {
6624       /*
6625         We're dealing with
6626 
6627           ... [LEFT] JOIN  ( ... ) ON (subquery AND condition) ...
6628 
6629         The sj-nest will be inserted into the brackets nest.
6630       */
6631       emb_tbl_nest=  subq_pred->embedding_join_nest;
6632       emb_join_list= &emb_tbl_nest->nested_join->join_list;
6633     }
6634     else if (!subq_pred->embedding_join_nest->outer_join)
6635     {
6636       /*
6637         We're dealing with
6638 
6639           ... INNER JOIN tblX ON (subquery AND condition) ...
6640 
6641         The sj-nest will be tblX's "sibling", i.e. another child of its
6642         parent. This is ok because tblX is joined as an inner join.
6643       */
6644       emb_tbl_nest= subq_pred->embedding_join_nest->embedding;
6645       if (emb_tbl_nest)
6646         emb_join_list= &emb_tbl_nest->nested_join->join_list;
6647     }
6648     else if (!subq_pred->embedding_join_nest->nested_join)
6649     {
6650       TABLE_LIST *outer_tbl= subq_pred->embedding_join_nest;
6651       /*
6652         We're dealing with
6653 
6654           ... LEFT JOIN tbl ON (on_expr AND subq_pred) ...
6655 
6656         we'll need to convert it into:
6657 
6658           ... LEFT JOIN ( tbl SJ (subq_tables) ) ON (on_expr AND subq_pred) ...
6659                         |                      |
6660                         |<----- wrap_nest ---->|
6661 
6662         Q:  other subqueries may be pointing to this element. What to do?
6663         A1: simple solution: copy *subq_pred->embedding_join_nest= *parent_nest.
6664             But we'll need to fix other pointers.
6665         A2: Another way: have TABLE_LIST::next_ptr so the following
6666             subqueries know the table has been nested.
6667         A3: changes in the TABLE_LIST::outer_join will make everything work
6668             automatically.
6669       */
6670       TABLE_LIST *const wrap_nest=
6671         TABLE_LIST::new_nested_join(thd->mem_root, "(sj-wrap)",
6672                                     outer_tbl->embedding, outer_tbl->join_list,
6673                                     parent_lex);
6674       if (wrap_nest == NULL)
6675         DBUG_RETURN(true);
6676 
6677       wrap_nest->nested_join->join_list.push_back(outer_tbl);
6678 
6679       outer_tbl->embedding= wrap_nest;
6680       outer_tbl->join_list= &wrap_nest->nested_join->join_list;
6681 
6682       /*
6683         wrap_nest will take place of outer_tbl, so move the outer join flag
6684         and join condition.
6685       */
6686       wrap_nest->outer_join= outer_tbl->outer_join;
6687       outer_tbl->outer_join= 0;
6688 
6689       wrap_nest->set_join_cond(outer_tbl->join_cond());
6690       outer_tbl->set_join_cond(NULL);
6691 
6692       List_iterator<TABLE_LIST> li(*wrap_nest->join_list);
6693       TABLE_LIST *tbl;
6694       while ((tbl= li++))
6695       {
6696         if (tbl == outer_tbl)
6697         {
6698           li.replace(wrap_nest);
6699           break;
6700         }
6701       }
6702 
6703       /*
6704         outer_tbl is replaced by wrap_nest.
6705         For subselects, update embedding_join_nest to point to wrap_nest
6706         instead of outer_tbl.
6707       */
6708       for (Item_exists_subselect **subquery= parent_join->sj_subselects.begin();
6709            subquery < parent_join->sj_subselects.end();
6710            subquery++)
6711       {
6712         if ((*subquery)->embedding_join_nest == outer_tbl)
6713           (*subquery)->embedding_join_nest= wrap_nest;
6714       }
6715 
6716       /*
6717         Ok now wrap_nest 'contains' outer_tbl and we're ready to add the
6718         semi-join nest into it
6719       */
6720       emb_join_list= &wrap_nest->nested_join->join_list;
6721       emb_tbl_nest=  wrap_nest;
6722     }
6723   }
6724 
6725   TABLE_LIST *const sj_nest=
6726     TABLE_LIST::new_nested_join(thd->mem_root, "(sj-nest)",
6727                                 emb_tbl_nest, emb_join_list, parent_lex);
6728   if (sj_nest == NULL)
6729     DBUG_RETURN(true);
6730 
6731   NESTED_JOIN *const nested_join= sj_nest->nested_join;
6732 
6733   /* Nests do not participate in those 'chains', so: */
6734   /* sj_nest->next_leaf= sj_nest->next_local= sj_nest->next_global == NULL*/
6735   emb_join_list->push_back(sj_nest);
6736 
6737   /*
6738     nested_join->used_tables and nested_join->not_null_tables are
6739     initialized in simplify_joins().
6740   */
6741 
6742   /*
6743     2. Walk through subquery's top list and set 'embedding' to point to the
6744        sj-nest.
6745   */
6746   st_select_lex *subq_lex= subq_pred->unit->first_select();
6747   nested_join->query_block_id= subq_lex->select_number;
6748   nested_join->join_list.empty();
6749   List_iterator_fast<TABLE_LIST> li(subq_lex->top_join_list);
6750   TABLE_LIST *tl;
6751   while ((tl= li++))
6752   {
6753     tl->embedding= sj_nest;
6754     tl->join_list= &nested_join->join_list;
6755     nested_join->join_list.push_back(tl);
6756   }
6757 
6758   /*
6759     Reconnect the next_leaf chain.
6760     TODO: Do we have to put subquery's tables at the end of the chain?
6761           Inserting them at the beginning would be a bit faster.
6762     NOTE: We actually insert them at the front! That's because the order is
6763           reversed in this list.
6764   */
6765   for (tl= parent_lex->leaf_tables; tl->next_leaf; tl= tl->next_leaf)
6766   {}
6767   tl->next_leaf= subq_lex->leaf_tables;
6768 
6769   /*
6770     Same as above for next_local chain. This needed only for re-execution.
6771     (The next_local chain always starts with SELECT_LEX::table_list)
6772   */
6773   for (tl= parent_lex->get_table_list(); tl->next_local; tl= tl->next_local)
6774   {}
6775   tl->next_local= subq_lex->get_table_list();
6776 
6777   /* A theory: no need to re-connect the next_global chain */
6778 
6779   /* 3. Remove the original subquery predicate from the WHERE/ON */
6780 
6781   // The subqueries were replaced for Item_int(1) earlier
6782   /*TODO: also reset the 'with_subselect' there. */
6783 
6784   /* n. Adjust the parent_join->tables counter */
6785   uint table_no= parent_join->tables;
6786   /* n. Walk through child's tables and adjust table->map */
6787   for (tl= subq_lex->leaf_tables; tl; tl= tl->next_leaf, table_no++)
6788   {
6789     tl->table->tablenr= table_no;
6790     tl->table->map= ((table_map)1) << table_no;
6791     SELECT_LEX *old_sl= tl->select_lex;
6792     tl->select_lex= parent_join->select_lex;
6793     for (TABLE_LIST *emb= tl->embedding;
6794          emb && emb->select_lex == old_sl;
6795          emb= emb->embedding)
6796       emb->select_lex= parent_join->select_lex;
6797   }
6798   parent_join->tables+= subq_lex->join->tables;
6799   parent_join->primary_tables+= subq_lex->join->tables;
6800 
6801   parent_lex->between_count+= subq_lex->between_count;
6802   parent_lex->cond_count+= subq_lex->cond_count;
6803   parent_lex->derived_table_count+= subq_lex->derived_table_count;
6804   parent_lex->materialized_table_count+= subq_lex->materialized_table_count;
6805   parent_lex->partitioned_table_count+= subq_lex->partitioned_table_count;
6806 
6807   nested_join->sj_outer_exprs.empty();
6808   nested_join->sj_inner_exprs.empty();
6809 
6810   /*
6811     @todo: Add similar conversion for subqueries other than IN.
6812   */
6813   if (subq_pred->substype() == Item_subselect::IN_SUBS)
6814   {
6815     Item_in_subselect *in_subq_pred= (Item_in_subselect *)subq_pred;
6816 
6817     /* Left side of IN predicate is already resolved */
6818     DBUG_ASSERT(in_subq_pred->left_expr->fixed);
6819 
6820     in_subq_pred->exec_method= Item_exists_subselect::EXEC_SEMI_JOIN;
6821     /*
6822       sj_corr_tables is supposed to contain non-trivially correlated tables,
6823       but here it is set to contain all correlated tables.
6824       @todo: Add analysis step that assigns only the set of non-trivially
6825       correlated tables to sj_corr_tables.
6826     */
6827     nested_join->sj_corr_tables= subq_pred->used_tables();
6828     /*
6829       sj_depends_on contains the set of outer tables referred in the
6830       subquery's WHERE clause as well as tables referred in the IN predicate's
6831       left-hand side.
6832     */
6833     nested_join->sj_depends_on=  subq_pred->used_tables() |
6834                                  in_subq_pred->left_expr->used_tables();
6835     /* Put the subquery's WHERE into semi-join's condition. */
6836     sj_nest->sj_on_expr= subq_lex->where;
6837 
6838     /*
6839     Create the IN-equalities and inject them into semi-join's ON condition.
6840     Additionally, for LooseScan strategy
6841      - Record the number of IN-equalities.
6842      - Create list of pointers to (oe1, ..., ieN). We'll need the list to
6843        see which of the expressions are bound and which are not (for those
6844        we'll produce a distinct stream of (ie_i1,...ie_ik).
6845 
6846        (TODO: can we just create a list of pointers and hope the expressions
6847        will not substitute themselves on fix_fields()? or we need to wrap
6848        them into Item_direct_view_refs and store pointers to those. The
6849        pointers to Item_direct_view_refs are guaranteed to be stable as
6850        Item_direct_view_refs doesn't substitute itself with anything in
6851        Item_direct_view_ref::fix_fields.
6852     */
6853 
6854     if (in_subq_pred->left_expr->type() == Item::SUBSELECT_ITEM)
6855     {
6856       List<Item> ref_list;
6857       uint i;
6858 
6859       Item *header= subq_lex->ref_pointer_array[0];
6860       for (i= 1; i < in_subq_pred->left_expr->cols(); i++)
6861       {
6862         ref_list.push_back(subq_lex->ref_pointer_array[i]);
6863       }
6864 
6865       Item_row *right_expr= new Item_row(header, ref_list);
6866 
6867       nested_join->sj_outer_exprs.push_back(in_subq_pred->left_expr);
6868       nested_join->sj_inner_exprs.push_back(right_expr);
6869       Item_func_eq *item_eq=
6870         new Item_func_eq(in_subq_pred->left_expr,
6871                          right_expr);
6872       if (item_eq == NULL)
6873         DBUG_RETURN(TRUE);
6874 
6875       sj_nest->sj_on_expr= and_items(sj_nest->sj_on_expr, item_eq);
6876       if (sj_nest->sj_on_expr == NULL)
6877         DBUG_RETURN(TRUE);
6878     }
6879     else
6880     {
6881       for (uint i= 0; i < in_subq_pred->left_expr->cols(); i++)
6882       {
6883         nested_join->sj_outer_exprs.push_back(in_subq_pred->left_expr->
6884                                               element_index(i));
6885         nested_join->sj_inner_exprs.push_back(subq_lex->ref_pointer_array[i]);
6886 
6887         Item_func_eq *item_eq=
6888           new Item_func_eq(in_subq_pred->left_expr->element_index(i),
6889                            subq_lex->ref_pointer_array[i]);
6890         if (item_eq == NULL)
6891           DBUG_RETURN(TRUE);
6892 
6893         sj_nest->sj_on_expr= and_items(sj_nest->sj_on_expr, item_eq);
6894         if (sj_nest->sj_on_expr == NULL)
6895           DBUG_RETURN(TRUE);
6896       }
6897     }
6898     /* Fix the created equality and AND */
6899 
6900     Opt_trace_array sj_on_trace(&thd->opt_trace,
6901                                 "evaluating_constant_semijoin_conditions");
6902     sj_nest->sj_on_expr->top_level_item();
6903     if (sj_nest->sj_on_expr->fix_fields(thd, &sj_nest->sj_on_expr))
6904       DBUG_RETURN(true);
6905   }
6906 
6907   /* Unlink the child select_lex: */
6908   subq_lex->master_unit()->exclude_level();
6909   parent_lex->removed_select= subq_lex;
6910   /*
6911     Update the resolver context - needed for Item_field objects that have been
6912     replaced in the item tree for this execution, but are still needed for
6913     subsequent executions.
6914   */
6915   for (st_select_lex *select= parent_lex->removed_select;
6916        select != NULL;
6917        select= select->removed_select)
6918     select->context.select_lex= parent_lex;
6919   /*
6920     Walk through sj nest's WHERE and ON expressions and call
6921     item->fix_table_changes() for all items.
6922   */
6923   sj_nest->sj_on_expr->fix_after_pullout(parent_lex, subq_lex);
6924   fix_list_after_tbl_changes(parent_lex, subq_lex,
6925                              &sj_nest->nested_join->join_list);
6926 
6927   //TODO fix QT_
6928   DBUG_EXECUTE("where",
6929                print_where(sj_nest->sj_on_expr,"SJ-EXPR", QT_ORDINARY););
6930 
6931   if (emb_tbl_nest)
6932   {
6933     /* Inject sj_on_expr into the parent's ON condition */
6934     emb_tbl_nest->set_join_cond(and_items(emb_tbl_nest->join_cond(),
6935                                           sj_nest->sj_on_expr));
6936     if (emb_tbl_nest->join_cond() == NULL)
6937       DBUG_RETURN(true);
6938     emb_tbl_nest->join_cond()->top_level_item();
6939     if (!emb_tbl_nest->join_cond()->fixed &&
6940         emb_tbl_nest->join_cond()->fix_fields(parent_join->thd,
6941                                               emb_tbl_nest->join_cond_ref()))
6942       DBUG_RETURN(true);
6943   }
6944   else
6945   {
6946     /* Inject sj_on_expr into the parent's WHERE condition */
6947     parent_join->conds= and_items(parent_join->conds, sj_nest->sj_on_expr);
6948     if (parent_join->conds == NULL)
6949       DBUG_RETURN(true);
6950     parent_join->conds->top_level_item();
6951     if (parent_join->conds->fix_fields(parent_join->thd, &parent_join->conds))
6952       DBUG_RETURN(true);
6953     parent_join->select_lex->where= parent_join->conds;
6954   }
6955 
6956   if (subq_lex->ftfunc_list->elements)
6957   {
6958     Item_func_match *ifm;
6959     List_iterator_fast<Item_func_match> li(*(subq_lex->ftfunc_list));
6960     while ((ifm= li++))
6961       parent_lex->ftfunc_list->push_front(ifm);
6962   }
6963 
6964   DBUG_RETURN(false);
6965 }
6966 
6967 
6968 /*
6969   Convert semi-join subquery predicates into semi-join join nests
6970 
6971   SYNOPSIS
6972     JOIN::flatten_subqueries()
6973 
6974   DESCRIPTION
6975 
6976     Convert candidate subquery predicates into semi-join join nests. This
6977     transformation is performed once in query lifetime and is irreversible.
6978 
6979     Conversion of one subquery predicate
6980     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6981     We start with a join that has a semi-join subquery:
6982 
6983       SELECT ...
6984       FROM ot, ...
6985       WHERE oe IN (SELECT ie FROM it1 ... itN WHERE subq_where) AND outer_where
6986 
6987     and convert it into a semi-join nest:
6988 
6989       SELECT ...
6990       FROM ot SEMI JOIN (it1 ... itN), ...
6991       WHERE outer_where AND subq_where AND oe=ie
6992 
6993     that is, in order to do the conversion, we need to
6994 
6995      * Create the "SEMI JOIN (it1 .. itN)" part and add it into the parent
6996        query's FROM structure.
6997      * Add "AND subq_where AND oe=ie" into parent query's WHERE (or ON if
6998        the subquery predicate was in an ON expression)
6999      * Remove the subquery predicate from the parent query's WHERE
7000 
7001     Considerations when converting many predicates
7002     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
7003     A join may have at most MAX_TABLES tables. This may prevent us from
7004     flattening all subqueries when the total number of tables in parent and
7005     child selects exceeds MAX_TABLES. In addition, one slot is reserved per
7006     semi-join nest, in case the subquery needs to be materialized in a
7007     temporary table.
7008     We deal with this problem by flattening children's subqueries first and
7009     then using a heuristic rule to determine each subquery predicate's
7010     "priority".
7011 
7012   RETURN
7013     FALSE  OK
7014     TRUE   Error
7015 */
7016 
flatten_subqueries()7017 bool JOIN::flatten_subqueries()
7018 {
7019   Item_exists_subselect **subq;
7020   Item_exists_subselect **subq_end;
7021   bool outer_join_objection= false;
7022   Opt_trace_context * const trace= &thd->opt_trace;
7023   DBUG_ENTER("JOIN::flatten_subqueries");
7024 
7025   if (sj_subselects.empty())
7026     DBUG_RETURN(FALSE);
7027 
7028   /* First, convert child join's subqueries. We proceed bottom-up here */
7029   for (subq= sj_subselects.begin(), subq_end= sj_subselects.end();
7030        subq < subq_end;
7031        subq++)
7032   {
7033     /*
7034       Currently, we only support transformation of IN subqueries.
7035     */
7036     DBUG_ASSERT((*subq)->substype() == Item_subselect::IN_SUBS);
7037 
7038     st_select_lex *child_select= (*subq)->unit->first_select();
7039     JOIN *child_join= child_select->join;
7040 
7041     /*
7042       child_select->where contains only the WHERE predicate of the
7043       subquery itself here. We may be selecting from a VIEW, which has its
7044       own predicate. The combined predicates are available in child_join->conds,
7045       which was built by setup_conds() doing prepare_where() for all views.
7046     */
7047     child_select->where= child_join->conds;
7048 
7049     if (child_join->flatten_subqueries())
7050       DBUG_RETURN(TRUE);
7051 
7052     (*subq)->sj_convert_priority=
7053       (((*subq)->unit->uncacheable & UNCACHEABLE_DEPENDENT) ? MAX_TABLES : 0) +
7054       child_join->tables;
7055   }
7056 
7057   //dump_TABLE_LIST_struct(select_lex, select_lex->leaf_tables);
7058   /*
7059     2. Pick which subqueries to convert:
7060       sort the subquery array
7061       - prefer correlated subqueries over uncorrelated;
7062       - prefer subqueries that have greater number of outer tables;
7063   */
7064   my_qsort(sj_subselects.begin(),
7065            sj_subselects.size(), sj_subselects.element_size(),
7066            reinterpret_cast<qsort_cmp>(subq_sj_candidate_cmp));
7067 
7068   Prepared_stmt_arena_holder ps_arena_holder(thd);
7069 
7070   // #tables-in-parent-query + #tables-in-subquery + sj nests <= MAX_TABLES
7071   /* Replace all subqueries to be flattened with Item_int(1) */
7072 
7073   uint table_count= tables;
7074   for (subq= sj_subselects.begin(); subq < subq_end; subq++)
7075   {
7076     // Add the tables in the subquery nest plus one in case of materialization:
7077     const uint tables_added= (*subq)->unit->first_select()->join->tables + 1;
7078     (*subq)->sj_chosen= table_count + tables_added <= MAX_TABLES;
7079 
7080     if (!(*subq)->sj_chosen)
7081       continue;
7082 
7083     table_count+= tables_added;
7084 
7085     Item **tree= ((*subq)->embedding_join_nest == NULL) ?
7086                    &conds : ((*subq)->embedding_join_nest->join_cond_ref());
7087     if (replace_subcondition(this, tree, *subq, new Item_int(1), FALSE))
7088       DBUG_RETURN(TRUE); /* purecov: inspected */
7089   }
7090 
7091   for (subq= sj_subselects.begin(); subq < subq_end; subq++)
7092   {
7093     if (!(*subq)->sj_chosen)
7094       continue;
7095 
7096     OPT_TRACE_TRANSFORM(trace, oto0, oto1,
7097                         (*subq)->unit->first_select()->select_number,
7098                         "IN (SELECT)", "semijoin");
7099     oto1.add("chosen", true);
7100     if (convert_subquery_to_semijoin(this, *subq))
7101       DBUG_RETURN(TRUE);
7102   }
7103   /*
7104     3. Finalize the subqueries that we did not convert,
7105        ie. perform IN->EXISTS rewrite.
7106   */
7107   for (subq= sj_subselects.begin(); subq < subq_end; subq++)
7108   {
7109     if ((*subq)->sj_chosen)
7110       continue;
7111     {
7112       OPT_TRACE_TRANSFORM(trace, oto0, oto1,
7113                           (*subq)->unit->first_select()->select_number,
7114                           "IN (SELECT)", "semijoin");
7115       if (outer_join_objection)
7116         oto1.add_alnum("cause", "outer_join");
7117       oto1.add("chosen", false);
7118     }
7119     JOIN *child_join= (*subq)->unit->first_select()->join;
7120     Item_subselect::trans_res res;
7121     (*subq)->changed= 0;
7122     (*subq)->fixed= 0;
7123 
7124     SELECT_LEX *save_select_lex= thd->lex->current_select;
7125     thd->lex->current_select= (*subq)->unit->first_select();
7126 
7127     res= (*subq)->select_transformer(child_join);
7128 
7129     thd->lex->current_select= save_select_lex;
7130 
7131     if (res == Item_subselect::RES_ERROR)
7132       DBUG_RETURN(TRUE);
7133 
7134     (*subq)->changed= 1;
7135     (*subq)->fixed= 1;
7136 
7137     Item *substitute= (*subq)->substitution;
7138     const bool do_fix_fields= !(*subq)->substitution->fixed;
7139     const bool subquery_in_join_clause= (*subq)->embedding_join_nest != NULL;
7140 
7141     Item **tree= subquery_in_join_clause ?
7142       ((*subq)->embedding_join_nest->join_cond_ref()) : &conds;
7143     if (replace_subcondition(this, tree, *subq, substitute, do_fix_fields))
7144       DBUG_RETURN(TRUE);
7145     (*subq)->substitution= NULL;
7146 
7147     if (!thd->stmt_arena->is_conventional())
7148     {
7149       if (subquery_in_join_clause)
7150       {
7151         tree= &((*subq)->embedding_join_nest->prep_join_cond);
7152         /*
7153           Some precaution is needed when dealing with PS/SP:
7154           fix_prepare_info_in_table_list() sets prep_join_cond, but only for
7155           tables, not for join nest objects. This is instead populated in
7156           record_join_nest_info(), which is called after this function.
7157           The case where *tree is NULL is handled by this procedure.
7158         */
7159       }
7160       else
7161         tree= &select_lex->prep_where;
7162 
7163       if (*tree && replace_subcondition(this, tree, *subq, substitute, false))
7164         DBUG_RETURN(true);
7165     }
7166   }
7167 
7168   sj_subselects.clear();
7169   DBUG_RETURN(FALSE);
7170 }
7171 
7172 
7173 /*
7174   Remove the predicates pushed down into the subquery
7175 
7176   SYNOPSIS
7177     JOIN::remove_subq_pushed_predicates()
7178       where   IN  Must be NULL
7179               OUT The remaining WHERE condition, or NULL
7180 
7181   DESCRIPTION
7182     Given that this join will be executed using (unique|index)_subquery,
7183     without "checking NULL", remove the predicates that were pushed down
7184     into the subquery.
7185 
7186     If the subquery compares scalar values, we can remove the condition that
7187     was wrapped into trig_cond (it will be checked when needed by the subquery
7188     engine)
7189 
7190     If the subquery compares row values, we need to keep the wrapped
7191     equalities in the WHERE clause: when the left (outer) tuple has both NULL
7192     and non-NULL values, we'll do a full table scan and will rely on the
7193     equalities corresponding to non-NULL parts of left tuple to filter out
7194     non-matching records.
7195 
7196     If '*where' is a triggered condition, or contains 'OR x IS NULL', or
7197     contains a condition coming from the original subquery's WHERE clause, or
7198     if there are more than one outer expressions, then WHERE is not of the
7199     simple form:
7200       outer_expr = inner_expr
7201     and thus this function does nothing.
7202 
7203     If the index is on prefix (=> test_if_ref() is false), then the equality
7204     is needed as post-filter, so this function does nothing.
7205 
7206     TODO: We can remove the equalities that will be guaranteed to be true by the
7207     fact that subquery engine will be using index lookup. This must be done only
7208     for cases where there are no conversion errors of significance, e.g. 257
7209     that is searched in a byte. But this requires homogenization of the return
7210     codes of all Field*::store() methods.
7211 */
remove_subq_pushed_predicates(Item ** where)7212 void JOIN::remove_subq_pushed_predicates(Item **where)
7213 {
7214   if (conds->type() == Item::FUNC_ITEM &&
7215       ((Item_func *)this->conds)->functype() == Item_func::EQ_FUNC &&
7216       ((Item_func *)conds)->arguments()[0]->type() == Item::REF_ITEM &&
7217       ((Item_func *)conds)->arguments()[1]->type() == Item::FIELD_ITEM &&
7218       test_if_ref (this->conds,
7219                    (Item_field *)((Item_func *)conds)->arguments()[1],
7220                    ((Item_func *)conds)->arguments()[0]))
7221   {
7222     *where= 0;
7223     return;
7224   }
7225 }
7226 
7227 
7228 /**
7229   @brief
7230   Add keys to derived tables'/views' result tables in a list
7231 
7232   @param select_lex generate derived keys for select_lex's derived tables
7233 
7234   @details
7235   This function generates keys for all derived tables/views of the select_lex
7236   to which this join corresponds to with help of the TABLE_LIST:generate_keys
7237   function.
7238 
7239   @return FALSE all keys were successfully added.
7240   @return TRUE OOM error
7241 */
7242 
generate_derived_keys()7243 bool JOIN::generate_derived_keys()
7244 {
7245   DBUG_ASSERT(select_lex->materialized_table_count);
7246 
7247   for (TABLE_LIST *table= select_lex->leaf_tables;
7248        table;
7249        table= table->next_leaf)
7250   {
7251     table->derived_keys_ready= TRUE;
7252     /* Process tables that aren't materialized yet. */
7253     if (table->uses_materialization() && !table->table->is_created() &&
7254         table->generate_keys())
7255       return TRUE;
7256   }
7257   return FALSE;
7258 }
7259 
7260 
7261 /**
7262   @brief
7263   Drop unused keys for each materialized derived table/view
7264 
7265   @details
7266   For each materialized derived table/view, call TABLE::use_index to save one
7267   index chosen by the optimizer and ignore others. If no key is chosen, then all
7268   keys will be ignored.
7269 */
7270 
drop_unused_derived_keys()7271 void JOIN::drop_unused_derived_keys()
7272 {
7273   DBUG_ASSERT(select_lex->materialized_table_count);
7274 
7275   for (uint i= 0 ; i < tables ; i++)
7276   {
7277     JOIN_TAB *tab= join_tab + i;
7278     TABLE *table= tab->table;
7279     /*
7280      Save chosen key description if:
7281      1) it's a materialized derived table
7282      2) it's not yet instantiated
7283      3) some keys are defined for it
7284     */
7285     if (table &&
7286         table->pos_in_table_list->uses_materialization() &&     // (1)
7287         !table->is_created() &&                                 // (2)
7288         table->max_keys > 0)                                    // (3)
7289     {
7290       Key_use *keyuse= tab->position->key;
7291 
7292       table->use_index(keyuse ? keyuse->key : -1);
7293 
7294       const bool key_is_const= keyuse && tab->const_keys.is_set(keyuse->key);
7295       tab->const_keys.clear_all();
7296       tab->keys.clear_all();
7297 
7298       if (!keyuse)
7299         continue;
7300 
7301       /*
7302         Update the selected "keyuse" to point to key number 0.
7303         Notice that unused keyuse entries still point to the deleted
7304         candidate keys. tab->keys (and tab->const_keys if the chosen key
7305         is constant) should reference key object no. 0 as well.
7306       */
7307       tab->keys.set_bit(0);
7308       if (key_is_const)
7309         tab->const_keys.set_bit(0);
7310 
7311       const uint oldkey= keyuse->key;
7312       for (; keyuse->table == table && keyuse->key == oldkey; keyuse++)
7313         keyuse->key= 0;
7314     }
7315   }
7316 }
7317 
7318 
7319 /**
7320   Cache constant expressions in WHERE, HAVING, ON conditions.
7321 
7322   @return False if success, True if error
7323 
7324   @note This function is run after conditions have been pushed down to
7325         individual tables, so transformation is applied to JOIN_TAB::condition
7326         and not to the WHERE condition.
7327 */
7328 
cache_const_exprs()7329 bool JOIN::cache_const_exprs()
7330 {
7331   /* No need in cache if all tables are constant. */
7332   DBUG_ASSERT(!plan_is_const());
7333 
7334   for (uint i= const_tables; i < tables; i++)
7335   {
7336     Item *condition= join_tab[i].condition();
7337     if (condition == NULL)
7338       continue;
7339     Item *cache_item= NULL;
7340     Item **analyzer_arg= &cache_item;
7341     condition=
7342       condition->compile(&Item::cache_const_expr_analyzer,
7343                          (uchar **)&analyzer_arg,
7344                          &Item::cache_const_expr_transformer,
7345                          (uchar *)&cache_item);
7346     if (condition == NULL)
7347       return true;
7348     if (condition != join_tab[i].condition())
7349       join_tab[i].set_condition(condition, __LINE__);
7350   }
7351   if (having)
7352   {
7353     Item *cache_item= NULL;
7354     Item **analyzer_arg= &cache_item;
7355     having=
7356       having->compile(&Item::cache_const_expr_analyzer, (uchar **)&analyzer_arg,
7357                       &Item::cache_const_expr_transformer,(uchar *)&cache_item);
7358     if (having == NULL)
7359       return true;
7360   }
7361   return false;
7362 }
7363 
7364 
replace_item_field(const char * field_name,Item * new_item)7365 void JOIN::replace_item_field(const char* field_name, Item* new_item)
7366 {
7367   if (conds)
7368   {
7369     conds= conds->compile(&Item::item_field_by_name_analyzer,
7370                           (uchar **)&field_name,
7371                           &Item::item_field_by_name_transformer,
7372                           (uchar *)new_item);
7373     conds->update_used_tables();
7374   }
7375 
7376   List_iterator<Item> it(fields_list);
7377   Item *item;
7378   while ((item= it++))
7379   {
7380     item= item->compile(&Item::item_field_by_name_analyzer,
7381                         (uchar **)&field_name,
7382                         &Item::item_field_by_name_transformer,
7383                         (uchar *)new_item);
7384     it.replace(item);
7385     item->update_used_tables();
7386   }
7387 }
7388 
7389 
7390 /**
7391   Extract a condition that can be checked after reading given table
7392 
7393   @param cond       Condition to analyze
7394   @param tables     Tables for which "current field values" are available
7395   @param used_table Table(s) that we are extracting the condition for (may
7396                     also include PSEUDO_TABLE_BITS, and may be zero)
7397   @param exclude_expensive_cond  Do not push expensive conditions
7398 
7399   @retval <>NULL Generated condition
7400   @retval = NULL Already checked, OR error
7401 
7402   @details
7403     Extract the condition that can be checked after reading the table(s)
7404     specified in @c used_table, given that current-field values for tables
7405     specified in @c tables bitmap are available.
7406     If @c used_table is 0, extract conditions for all tables in @c tables.
7407 
7408     This function can be used to extract conditions relevant for a table
7409     in a join order. Together with its caller, it will ensure that all
7410     conditions are attached to the first table in the join order where all
7411     necessary fields are available, and it will also ensure that a given
7412     condition is attached to only one table.
7413     To accomplish this, first initialize @c tables to the empty
7414     set. Then, loop over all tables in the join order, set @c used_table to
7415     the bit representing the current table, accumulate @c used_table into the
7416     @c tables set, and call this function. To ensure correct handling of
7417     const expressions and outer references, add the const table map and
7418     OUTER_REF_TABLE_BIT to @c used_table for the first table. To ensure
7419     that random expressions are evaluated for the final table, add
7420     RAND_TABLE_BIT to @c used_table for the final table.
7421 
7422     The function assumes that constant, inexpensive parts of the condition
7423     have already been checked. Constant, expensive parts will be attached
7424     to the first table in the join order, provided that the above call
7425     sequence is followed.
7426 
7427     The call order will ensure that conditions covering tables in @c tables
7428     minus those in @c used_table, have already been checked.
7429 
7430     The function takes into account that some parts of the condition are
7431     guaranteed to be true by employed 'ref' access methods (the code that
7432     does this is located at the end, search down for "EQ_FUNC").
7433 
7434   @note
7435     make_cond_for_info_schema() uses an algorithm similar to
7436     make_cond_for_table().
7437 */
7438 
7439 Item *
make_cond_for_table(Item * cond,table_map tables,table_map used_table,bool exclude_expensive_cond)7440 make_cond_for_table(Item *cond, table_map tables, table_map used_table,
7441                     bool exclude_expensive_cond)
7442 {
7443   return make_cond_for_table_from_pred(cond, cond, tables, used_table,
7444                                        exclude_expensive_cond);
7445 }
7446 
7447 static Item *
make_cond_for_table_from_pred(Item * root_cond,Item * cond,table_map tables,table_map used_table,bool exclude_expensive_cond)7448 make_cond_for_table_from_pred(Item *root_cond, Item *cond,
7449                               table_map tables, table_map used_table,
7450                               bool exclude_expensive_cond)
7451 {
7452   /*
7453     Ignore this condition if
7454      1. We are extracting conditions for a specific table, and
7455      2. that table is not referenced by the condition, but not if
7456      3. this is a constant condition not checked at optimization time and
7457         this is the first table we are extracting conditions for.
7458        (Assuming that used_table == tables for the first table.)
7459   */
7460   if (used_table &&                                                 // 1
7461       !(cond->used_tables() & used_table) &&                        // 2
7462       !(cond->is_expensive() && used_table == tables))              // 3
7463     return NULL;
7464 
7465   if (cond->type() == Item::COND_ITEM)
7466   {
7467     if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
7468     {
7469       /* Create new top level AND item */
7470       Item_cond_and *new_cond= new Item_cond_and;
7471       if (!new_cond)
7472         return NULL;
7473       List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
7474       Item *item;
7475       while ((item= li++))
7476       {
7477         Item *fix= make_cond_for_table_from_pred(root_cond, item,
7478                                                  tables, used_table,
7479                                                  exclude_expensive_cond);
7480         if (fix)
7481           new_cond->argument_list()->push_back(fix);
7482       }
7483       switch (new_cond->argument_list()->elements) {
7484       case 0:
7485         return NULL;                          // Always true
7486       case 1:
7487         return new_cond->argument_list()->head();
7488       default:
7489         if (new_cond->fix_fields(current_thd, NULL))
7490           return NULL;
7491         return new_cond;
7492       }
7493     }
7494     else
7495     {                                         // Or list
7496       Item_cond_or *new_cond= new Item_cond_or;
7497       if (!new_cond)
7498         return NULL;
7499       List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
7500       Item *item;
7501       while ((item= li++))
7502       {
7503         Item *fix= make_cond_for_table_from_pred(root_cond, item,
7504                                                  tables, 0L,
7505                                                  exclude_expensive_cond);
7506 	if (!fix)
7507           return NULL;                        // Always true
7508 	new_cond->argument_list()->push_back(fix);
7509       }
7510       if (new_cond->fix_fields(current_thd, NULL))
7511         return NULL;
7512       return new_cond;
7513     }
7514   }
7515 
7516   /*
7517     Omit this condition if
7518      1. It has been marked as omittable before, or
7519      2. Some tables referred by the condition are not available, or
7520      3. We are extracting conditions for all tables, the condition is
7521         considered 'expensive', and we want to delay evaluation of such
7522         conditions to the execution phase.
7523   */
7524   if (cond->marker == 3 ||                                             // 1
7525       (cond->used_tables() & ~tables) ||                               // 2
7526       (!used_table && exclude_expensive_cond && cond->is_expensive())) // 3
7527     return NULL;
7528 
7529   /*
7530     Extract this condition if
7531      1. It has already been marked as applicable, or
7532      2. It is not a <comparison predicate> (=, <, >, <=, >=, <=>)
7533   */
7534   if (cond->marker == 2 ||                                             // 1
7535       cond->eq_cmp_result() == Item::COND_OK)                          // 2
7536     return cond;
7537 
7538   /*
7539     Remove equalities that are guaranteed to be true by use of 'ref' access
7540     method.
7541     Note that ref access implements "table1.field1 <=> table2.indexed_field2",
7542     i.e. if it passed a NULL field1, it will return NULL indexed_field2 if
7543     there are.
7544     Thus the equality "table1.field1 = table2.indexed_field2",
7545     is equivalent to "ref access AND table1.field1 IS NOT NULL"
7546     i.e. "ref access and proper setting/testing of ref->null_rejecting".
7547     Thus, we must be careful, that when we remove equalities below we also
7548     set ref->null_rejecting, and test it at execution; otherwise wrong NULL
7549     matches appear.
7550     So:
7551     - for the optimization phase, the code which is below, and the code in
7552     test_if_ref(), and in add_key_field(), must be kept in sync: if the
7553     applicability conditions in one place are relaxed, they should also be
7554     relaxed elsewhere.
7555     - for the execution phase, all possible execution methods must test
7556     ref->null_rejecting.
7557   */
7558   if (cond->type() == Item::FUNC_ITEM &&
7559       ((Item_func*) cond)->functype() == Item_func::EQ_FUNC)
7560   {
7561     Item *left_item= ((Item_func*) cond)->arguments()[0]->real_item();
7562     Item *right_item= ((Item_func*) cond)->arguments()[1]->real_item();
7563     if ((left_item->type() == Item::FIELD_ITEM &&
7564          test_if_ref(root_cond, (Item_field*) left_item, right_item)) ||
7565         (right_item->type() == Item::FIELD_ITEM &&
7566          test_if_ref(root_cond, (Item_field*) right_item, left_item)))
7567     {
7568       cond->marker= 3;                   // Condition can be omitted
7569       return NULL;
7570     }
7571   }
7572   cond->marker= 2;                      // Mark condition as applicable
7573   return cond;
7574 }
7575 
7576 
7577 /**
7578   Separates the predicates in a join condition and pushes them to the
7579   join step where all involved tables are available in the join prefix.
7580   ON clauses from JOIN expressions are also pushed to the most appropriate step.
7581 
7582   @param join Join object where predicates are pushed.
7583 
7584   @param cond Pointer to condition which may contain an arbitrary number of
7585               predicates, combined using AND, OR and XOR items.
7586               If NULL, equivalent to a predicate that returns TRUE for all
7587               row combinations.
7588 
7589 
7590   @retval true  Found impossible WHERE clause, or out-of-memory
7591   @retval false Other
7592 */
7593 
make_join_select(JOIN * join,Item * cond)7594 static bool make_join_select(JOIN *join, Item *cond)
7595 {
7596   THD *thd= join->thd;
7597   Opt_trace_context * const trace= &thd->opt_trace;
7598   DBUG_ENTER("make_join_select");
7599   {
7600     add_not_null_conds(join);
7601     /*
7602       Step #1: Extract constant condition
7603        - Extract and check the constant part of the WHERE
7604        - Extract constant parts of ON expressions from outer
7605          joins and attach them appropriately.
7606     */
7607     if (cond)                /* Because of QUICK_GROUP_MIN_MAX_SELECT */
7608     {                        /* there may be a select without a cond. */
7609       if (join->primary_tables > 1)
7610         cond->update_used_tables();		// Tablenr may have changed
7611       if (join->plan_is_const() &&
7612 	  thd->lex->current_select->master_unit() ==
7613 	  &thd->lex->unit)		// not upper level SELECT
7614         join->const_table_map|=RAND_TABLE_BIT;
7615 
7616       /*
7617         Extract expressions that depend on constant tables
7618         1. Const part of the join's WHERE clause can be checked immediately
7619            and if it is not satisfied then the join has empty result
7620         2. Constant parts of outer joins' ON expressions must be attached
7621            there inside the triggers.
7622       */
7623       {
7624         Item *const_cond=
7625 	  make_cond_for_table(cond,
7626                               join->const_table_map,
7627                               (table_map) 0, 1);
7628         /* Add conditions added by add_not_null_conds(). */
7629         for (uint i= 0 ; i < join->const_tables ; i++)
7630         {
7631           if (and_conditions(&const_cond, join->join_tab[i].condition()))
7632             DBUG_RETURN(true);
7633         }
7634 
7635         DBUG_EXECUTE("where",print_where(const_cond,"constants", QT_ORDINARY););
7636         for (JOIN_TAB *tab= join->join_tab+join->const_tables;
7637              tab < join->join_tab+join->tables ; tab++)
7638         {
7639           if (tab->on_expr_ref && *tab->on_expr_ref)
7640           {
7641             JOIN_TAB *cond_tab= tab->first_inner;
7642             Item *tmp= make_cond_for_table(*tab->on_expr_ref,
7643                                            join->const_table_map,
7644                                            (  table_map) 0, 0);
7645             if (!tmp)
7646               continue;
7647             tmp= new
7648               Item_func_trig_cond(tmp, &cond_tab->not_null_compl, cond_tab,
7649                                   Item_func_trig_cond::IS_NOT_NULL_COMPL);
7650             if (!tmp)
7651               DBUG_RETURN(true);
7652 
7653             tmp->quick_fix_field();
7654             if (cond_tab->and_with_condition(tmp, __LINE__))
7655               DBUG_RETURN(true);
7656           }
7657         }
7658         if (const_cond != NULL)
7659         {
7660           const bool const_cond_is_true= const_cond->val_int() != 0;
7661           Opt_trace_object trace_const_cond(trace);
7662           trace_const_cond.add("condition_on_constant_tables", const_cond)
7663             .add("condition_value", const_cond_is_true);
7664           if (!const_cond_is_true)
7665           {
7666             DBUG_PRINT("info",("Found impossible WHERE condition"));
7667             DBUG_RETURN(1);	 // Impossible const condition
7668           }
7669         }
7670       }
7671     }
7672 
7673     /*
7674       Step #2: Extract WHERE/ON parts
7675     */
7676     Opt_trace_object trace_wrapper(trace);
7677     Opt_trace_object
7678       trace_conditions(trace, "attaching_conditions_to_tables");
7679     trace_conditions.add("original_condition", cond);
7680     Opt_trace_array
7681       trace_attached_comp(trace, "attached_conditions_computation");
7682 
7683     for (uint i=join->const_tables ; i < join->tables ; i++)
7684     {
7685       JOIN_TAB *const tab= join->join_tab + i;
7686 
7687       if (!tab->position)
7688         continue;
7689       /*
7690         first_inner is the X in queries like:
7691         SELECT * FROM t1 LEFT OUTER JOIN (t2 JOIN t3) ON X
7692       */
7693       JOIN_TAB *const first_inner_tab= tab->first_inner;
7694       const table_map used_tables= tab->prefix_tables();
7695       const table_map current_map= tab->added_tables();
7696       bool use_quick_range=0;
7697       Item *tmp;
7698 
7699       /// See if you need to switch to range access
7700       if (tab->type == JT_REF && can_switch_from_ref_to_range(thd, tab))
7701       {
7702         Opt_trace_object wrapper(trace);
7703         Opt_trace_object (trace, "access_type_changed").
7704           add_utf8_table(tab->table).
7705           add_utf8("index", tab->table->key_info[tab->ref.key].name).
7706           add_alnum("old_type", "ref").
7707           add_alnum("new_type", "range").
7708           add_alnum("cause", "uses_more_keyparts");
7709 
7710 	tab->type=JT_ALL;
7711 	use_quick_range=1;
7712 	tab->use_quick=QS_RANGE;
7713         tab->ref.key= -1;
7714 	tab->ref.key_parts=0;		// Don't use ref key.
7715 	tab->position->records_read= rows2double(tab->quick->records);
7716         /*
7717           We will use join cache here : prevent sorting of the first
7718           table only and sort at the end.
7719         */
7720         if (i != join->const_tables &&
7721             join->primary_tables > join->const_tables + 1)
7722           join->full_join= true;
7723       }
7724 
7725       tmp= NULL;
7726       if (cond)
7727         tmp= make_cond_for_table(cond,used_tables,current_map, 0);
7728       /* Add conditions added by add_not_null_conds(). */
7729       if (tab->condition() && and_conditions(&tmp, tab->condition()))
7730         DBUG_RETURN(true);
7731 
7732 
7733       if (cond && !tmp && tab->quick)
7734       {						// Outer join
7735         if (tab->type != JT_ALL)
7736         {
7737           /*
7738             Don't use the quick method
7739             We come here in the case where we have 'key=constant' and
7740             the test is removed by make_cond_for_table()
7741           */
7742           delete tab->quick;
7743           tab->quick= 0;
7744         }
7745         else
7746         {
7747           /*
7748             Hack to handle the case where we only refer to a table
7749             in the ON part of an OUTER JOIN. In this case we want the code
7750             below to check if we should use 'quick' instead.
7751           */
7752           DBUG_PRINT("info", ("Item_int"));
7753           tmp= new Item_int((longlong) 1,1);	// Always true
7754         }
7755 
7756       }
7757       if (tmp || !cond || tab->type == JT_REF || tab->type == JT_REF_OR_NULL ||
7758           tab->type == JT_EQ_REF || first_inner_tab)
7759       {
7760         DBUG_EXECUTE("where",print_where(tmp,tab->table->alias, QT_ORDINARY););
7761 	SQL_SELECT *sel= tab->select= new (thd->mem_root) SQL_SELECT;
7762 	if (!sel)
7763 	  DBUG_RETURN(1);			// End of memory
7764         sel->read_tables= sel->const_tables= join->const_table_map;
7765         /*
7766           If tab is an inner table of an outer join operation,
7767           add a match guard to the pushed down predicate.
7768           The guard will turn the predicate on only after
7769           the first match for outer tables is encountered.
7770 	*/
7771         if (cond && tmp)
7772         {
7773           /*
7774             Because of QUICK_GROUP_MIN_MAX_SELECT there may be a select without
7775             a cond, so neutralize the hack above.
7776           */
7777           if (!(tmp= add_found_match_trig_cond(first_inner_tab, tmp, 0)))
7778             DBUG_RETURN(true);
7779           sel->cond= tmp;
7780           tab->set_condition(tmp, __LINE__);
7781           /* Push condition to storage engine if this is enabled
7782              and the condition is not guarded */
7783 	  if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN) &&
7784               !first_inner_tab)
7785           {
7786             Item *push_cond=
7787               make_cond_for_table(tmp, tab->table->map, tab->table->map, 0);
7788             if (push_cond)
7789             {
7790               /* Push condition to handler */
7791               if (!tab->table->file->cond_push(push_cond))
7792                 tab->table->file->pushed_cond= push_cond;
7793             }
7794           }
7795         }
7796         else
7797         {
7798           sel->cond= NULL;
7799           tab->set_condition(NULL, __LINE__);
7800         }
7801 
7802 	sel->head=tab->table;
7803         DBUG_EXECUTE("where",print_where(tmp,tab->table->alias, QT_ORDINARY););
7804 	if (tab->quick)
7805 	{
7806 	  /* Use quick key read if it's a constant and it's not used
7807 	     with key reading */
7808           if (tab->needed_reg.is_clear_all() && tab->type != JT_EQ_REF &&
7809               tab->type != JT_FT &&
7810               ((tab->type != JT_CONST && tab->type != JT_REF) ||
7811                (uint)tab->ref.key == tab->quick->index))
7812           {
7813             DBUG_ASSERT(tab->quick->is_valid());
7814 	    sel->quick=tab->quick;		// Use value from get_quick_...
7815 	    sel->quick_keys.clear_all();
7816 	    sel->needed_reg.clear_all();
7817 	  }
7818 	  else
7819 	  {
7820 	    delete tab->quick;
7821 	  }
7822 	  tab->quick=0;
7823 	}
7824 	uint ref_key=(uint) sel->head->reginfo.join_tab->ref.key+1;
7825 	if (i == join->const_tables && ref_key)
7826 	{
7827 	  if (!tab->const_keys.is_clear_all() &&
7828               tab->table->reginfo.impossible_range)
7829 	    DBUG_RETURN(1);
7830 	}
7831 	else if (tab->type == JT_ALL && ! use_quick_range)
7832 	{
7833 	  if (!tab->const_keys.is_clear_all() &&
7834 	      tab->table->reginfo.impossible_range)
7835 	    DBUG_RETURN(1);				// Impossible range
7836           /*
7837             We plan to scan (table/index/range scan).
7838             Check again if we should use an index. We can use an index if:
7839 
7840             1a) There is a condition that range optimizer can work on, and
7841             1b) There are non-constant conditions on one or more keys, and
7842             1c) Some of the non-constant fields may have been read
7843                 already. This may be the case if this is not the first
7844                 table in the join OR this is a subselect with
7845                 non-constant conditions referring to an outer table
7846                 (dependent subquery)
7847                 or,
7848             2a) There are conditions only relying on constants
7849             2b) This is the first non-constant table
7850             2c) There is a limit of rows to read that is lower than
7851                 the fanout for this table (i.e., the estimated number
7852                 of rows that will be produced for this table per row
7853                 combination of previous tables)
7854             2d) The query is NOT run with FOUND_ROWS() (because in that
7855                 case we have to scan through all rows to count them anyway)
7856           */
7857           enum { DONT_RECHECK, NOT_FIRST_TABLE, LOW_LIMIT }
7858           recheck_reason= DONT_RECHECK;
7859 
7860           if (cond &&                                                // 1a
7861               (tab->keys != tab->const_keys) &&                      // 1b
7862               (i > 0 ||                                              // 1c
7863                (join->select_lex->master_unit()->item &&
7864                 cond->used_tables() & OUTER_REF_TABLE_BIT)))
7865             recheck_reason= NOT_FIRST_TABLE;
7866           else if (!tab->const_keys.is_clear_all() &&               // 2a
7867                    i == join->const_tables &&                       // 2b
7868                    (join->unit->select_limit_cnt <
7869                     tab->position->records_read) &&                 // 2c
7870                    !(join->select_options & OPTION_FOUND_ROWS))     // 2d
7871             recheck_reason= LOW_LIMIT;
7872 
7873           if (recheck_reason != DONT_RECHECK)
7874           {
7875             Opt_trace_object trace_one_table(trace);
7876             trace_one_table.add_utf8_table(tab->table);
7877             Opt_trace_object trace_table(trace, "rechecking_index_usage");
7878             if (recheck_reason == NOT_FIRST_TABLE)
7879               trace_table.add_alnum("recheck_reason", "not_first_table");
7880             else
7881               trace_table.add_alnum("recheck_reason", "low_limit").
7882                 add("limit", join->unit->select_limit_cnt).
7883                 add("row_estimate", tab->position->records_read);
7884 
7885             /* Join with outer join condition */
7886             Item *orig_cond=sel->cond;
7887             sel->cond= and_conds(sel->cond, *tab->on_expr_ref);
7888 
7889             /*
7890               We can't call sel->cond->fix_fields,
7891               as it will break tab->join_cond() if it's AND condition
7892               (fix_fields currently removes extra AND/OR levels).
7893               Yet attributes of the just built condition are not needed.
7894               Thus we call sel->cond->quick_fix_field for safety.
7895             */
7896             if (sel->cond && !sel->cond->fixed)
7897               sel->cond->quick_fix_field();
7898 
7899             key_map usable_keys= tab->keys;
7900             if (tab->table->force_index)
7901               usable_keys.intersect(tab->table->keys_in_use_for_order_by);
7902 
7903             ORDER::enum_order interesting_order= ORDER::ORDER_NOT_RELEVANT;
7904 
7905             if (recheck_reason == LOW_LIMIT)
7906             {
7907               /*
7908                 When optimizing for ORDER BY ... LIMIT, only indexes
7909                 that give correct ordering are of interest. The block
7910                 below removes all other indexes from usable_keys so
7911                 the range optimizer (see test_quick_select() below)
7912                 does not consider them.
7913               */
7914               for (uint idx= 0; idx < tab->table->s->keys; idx++)
7915               {
7916                 /*
7917                   No need to check if indexes that we're not allowed
7918                   to use can provide required ordering.
7919                 */
7920                 if (!usable_keys.is_set(idx))
7921                   continue;
7922 
7923                 const int read_direction=
7924                   test_if_order_by_key(join->order, tab->table, idx);
7925                 if (read_direction == 0)
7926                 {
7927                   // The index cannot provide required ordering
7928                   usable_keys.clear_bit(idx);
7929                   continue;
7930                 }
7931 
7932                 /*
7933                   Currently, only ASC ordered indexes are availabe,
7934                   which means that if ordering can be achieved by
7935                   reading the index in forward direction, then we have
7936                   ORDER BY... ASC. Likewise, if ordering can be
7937                   achieved by reading the index in backward direction,
7938                   then we have ORDER BY ... DESC.
7939 
7940                   Furthermore, if correct order can be achieved by
7941                   reading one index in either forward or backward
7942                   direction, then all other applicable indexes will
7943                   need to be read in the same direction (so no reason
7944                   to check that read_direction is the same for all
7945                   applicable indexes).
7946 
7947                   If DESC/mixed ordered indexes will be possible in
7948                   the future, the implied connection between index
7949                   read direction and ASC/DESC ordering will no longer
7950                   hold.
7951                 */
7952                 interesting_order= (read_direction == -1 ? ORDER::ORDER_DESC :
7953                                                            ORDER::ORDER_ASC);
7954               }
7955 
7956               if (usable_keys.is_clear_all())
7957                 recheck_reason= DONT_RECHECK; // No usable keys
7958 
7959               /*
7960                 If the current plan is to use a range access on an
7961                 index that provides the order dictated by the ORDER BY
7962                 clause there is no need to recheck index usage; we
7963                 already know from the former call to
7964                 test_quick_select() that a range scan on the chosen
7965                 index is cheapest. Note that previous calls to
7966                 test_quick_select() did not take order direction
7967                 (ASC/DESC) into account, so in case of DESC ordering
7968                 we still need to recheck.
7969               */
7970               if (sel->quick && (sel->quick->index != MAX_KEY) &&
7971                   usable_keys.is_set(sel->quick->index) &&
7972                   (interesting_order != ORDER::ORDER_DESC ||
7973                    sel->quick->reverse_sorted()))
7974               {
7975                 recheck_reason= DONT_RECHECK;
7976               }
7977             }
7978 
7979             if ((recheck_reason != DONT_RECHECK) &&
7980                 sel->test_quick_select(thd, usable_keys,
7981                                        used_tables & ~tab->table->map,
7982                                        (join->select_options &
7983                                         OPTION_FOUND_ROWS ?
7984                                         HA_POS_ERROR :
7985                                         join->unit->select_limit_cnt),
7986                                        false,   // don't force quick range
7987                                        interesting_order) < 0)
7988             {
7989 	      /*
7990 		Before reporting "Impossible WHERE" for the whole query
7991 		we have to check isn't it only "impossible ON" instead
7992 	      */
7993               sel->cond=orig_cond;
7994               if (!*tab->on_expr_ref)
7995                 DBUG_RETURN(1);                 // Impossible WHERE
7996               Opt_trace_object trace_without_on(trace, "without_ON_clause");
7997               if (sel->test_quick_select(thd, tab->keys,
7998                                          used_tables & ~tab->table->map,
7999                                          (join->select_options &
8000                                           OPTION_FOUND_ROWS ?
8001                                           HA_POS_ERROR :
8002                                           join->unit->select_limit_cnt),
8003                                          false,   //don't force quick range
8004                                          ORDER::ORDER_NOT_RELEVANT) < 0)
8005 		DBUG_RETURN(1);			// Impossible WHERE
8006             }
8007             else
8008 	      sel->cond=orig_cond;
8009 
8010 	    /* Fix for EXPLAIN */
8011 	    if (sel->quick)
8012 	      tab->position->records_read= (double)sel->quick->records;
8013 	  }
8014 	  else
8015 	  {
8016 	    sel->needed_reg=tab->needed_reg;
8017 	    sel->quick_keys.clear_all();
8018 	  }
8019 	  if (!sel->quick_keys.is_subset(tab->checked_keys) ||
8020               !sel->needed_reg.is_subset(tab->checked_keys))
8021 	  {
8022 	    tab->keys=sel->quick_keys;
8023             tab->keys.merge(sel->needed_reg);
8024 	    tab->use_quick= (!sel->needed_reg.is_clear_all() &&
8025 			     (sel->quick_keys.is_clear_all() ||
8026 			      (sel->quick &&
8027 			       (sel->quick->records >= 100L)))) ?
8028 	      QS_DYNAMIC_RANGE : QS_RANGE;
8029 	    sel->read_tables= used_tables & ~current_map;
8030 	  }
8031 	  if (i != join->const_tables && tab->use_quick != QS_DYNAMIC_RANGE &&
8032               !tab->first_inner)
8033 	  {					/* Read with cache */
8034 	    if (cond &&
8035                 (tmp=make_cond_for_table(cond,
8036 					 join->const_table_map |
8037 					 current_map,
8038 					 current_map, 0)))
8039 	    {
8040               DBUG_EXECUTE("where",print_where(tmp,"cache", QT_ORDINARY););
8041 	      tab->cache_select=(SQL_SELECT*)
8042 		thd->memdup((uchar*) sel, sizeof(SQL_SELECT));
8043 	      tab->cache_select->cond=tmp;
8044 	      tab->cache_select->read_tables=join->const_table_map;
8045 	    }
8046 	  }
8047 	}
8048       }
8049 
8050       if (pushdown_on_conditions(join, tab))
8051         DBUG_RETURN(1);
8052     }
8053     trace_attached_comp.end();
8054 
8055     /*
8056       In outer joins the loop above, in iteration for table #i, may push
8057       conditions to a table before #i. Thus, the processing below has to be in
8058       a separate loop:
8059     */
8060     Opt_trace_array trace_attached_summary(trace,
8061                                            "attached_conditions_summary");
8062     for (uint i= join->const_tables ; i < join->tables ; i++)
8063     {
8064       JOIN_TAB * const tab= &join->join_tab[i];
8065       if (!tab->table)
8066         continue;
8067       Item * const cond= tab->condition();
8068       Opt_trace_object trace_one_table(trace);
8069       trace_one_table.add_utf8_table(tab->table).
8070         add("attached", cond);
8071       if (cond &&
8072           cond->has_subquery() /* traverse only if needed */ )
8073       {
8074         /*
8075           Why we pass walk_subquery=false: imagine
8076           WHERE t1.col IN (SELECT * FROM t2
8077                              WHERE t2.col IN (SELECT * FROM t3)
8078           and tab==t1. The grandchild subquery (SELECT * FROM t3) should not
8079           be marked as "in condition of t1" but as "in condition of t2", for
8080           correct calculation of the number of its executions.
8081         */
8082         int idx= tab - join->join_tab;
8083         cond->walk(&Item::inform_item_in_cond_of_tab, false,
8084                    reinterpret_cast<uchar * const>(&idx));
8085       }
8086 
8087     }
8088   }
8089   DBUG_RETURN(0);
8090 }
8091 
8092 
8093 /**
8094   Remove the following expressions from ORDER BY and GROUP BY:
8095   Constant expressions @n
8096   Expression that only uses tables that are of type EQ_REF and the reference
8097   is in the ORDER list or if all refereed tables are of the above type.
8098 
8099   In the following, the X field can be removed:
8100   @code
8101   SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t1.a,t2.X
8102   SELECT * FROM t1,t2,t3 WHERE t1.a=t2.a AND t2.b=t3.b ORDER BY t1.a,t3.X
8103   @endcode
8104 
8105   These can't be optimized:
8106   @code
8107   SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t2.X,t1.a
8108   SELECT * FROM t1,t2 WHERE t1.a=t2.a AND t1.b=t2.b ORDER BY t1.a,t2.c
8109   SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t2.b,t1.a
8110   @endcode
8111 
8112   @param  JOIN         join object
8113   @param  start_order  clause being analyzed (ORDER BY, GROUP BY...)
8114   @param  tab          table
8115   @param  cached_eq_ref_tables  bitmap: bit Z is set if the table of map Z
8116   was already the subject of an eq_ref_table() call for the same clause; then
8117   the return value of this previous call can be found at bit Z of
8118   'eq_ref_tables'
8119   @param  eq_ref_tables see above.
8120 */
8121 
8122 static bool
eq_ref_table(JOIN * join,ORDER * start_order,JOIN_TAB * tab,table_map * cached_eq_ref_tables,table_map * eq_ref_tables)8123 eq_ref_table(JOIN *join, ORDER *start_order, JOIN_TAB *tab,
8124              table_map *cached_eq_ref_tables, table_map *eq_ref_tables)
8125 {
8126   /* We can skip const tables only if not an outer table */
8127   if (tab->type == JT_CONST && !tab->first_inner)
8128     return true;
8129   if (tab->type != JT_EQ_REF || tab->table->maybe_null)
8130     return false;
8131 
8132   const table_map map= tab->table->map;
8133   uint found= 0;
8134 
8135   for (Item **ref_item= tab->ref.items, **end= ref_item + tab->ref.key_parts ;
8136        ref_item != end ; ref_item++)
8137   {
8138     if (! (*ref_item)->const_item())
8139     {						// Not a const ref
8140       ORDER *order;
8141       for (order=start_order ; order ; order=order->next)
8142       {
8143 	if ((*ref_item)->eq(order->item[0],0))
8144 	  break;
8145       }
8146       if (order)
8147       {
8148         if (!(order->used & map))
8149         {
8150           found++;
8151           order->used|= map;
8152         }
8153 	continue;				// Used in ORDER BY
8154       }
8155       if (!only_eq_ref_tables(join, start_order, (*ref_item)->used_tables(),
8156                               cached_eq_ref_tables, eq_ref_tables))
8157         return false;
8158     }
8159   }
8160   /* Check that there was no reference to table before sort order */
8161   for (; found && start_order ; start_order=start_order->next)
8162   {
8163     if (start_order->used & map)
8164     {
8165       found--;
8166       continue;
8167     }
8168     if (start_order->depend_map & map)
8169       return false;
8170   }
8171   return true;
8172 }
8173 
8174 
8175 /// @see eq_ref_table()
8176 static bool
only_eq_ref_tables(JOIN * join,ORDER * order,table_map tables,table_map * cached_eq_ref_tables,table_map * eq_ref_tables)8177 only_eq_ref_tables(JOIN *join, ORDER *order, table_map tables,
8178                    table_map *cached_eq_ref_tables, table_map *eq_ref_tables)
8179 {
8180   tables&= ~PSEUDO_TABLE_BITS;
8181   for (JOIN_TAB **tab=join->map2table ; tables ; tab++, tables>>=1)
8182   {
8183     if (tables & 1)
8184     {
8185       const table_map map= (*tab)->table->map;
8186       bool is_eq_ref;
8187       if (*cached_eq_ref_tables & map) // then there exists a cached bit
8188         is_eq_ref= *eq_ref_tables & map;
8189       else
8190       {
8191         is_eq_ref= eq_ref_table(join, order, *tab,
8192                                 cached_eq_ref_tables, eq_ref_tables);
8193         if (is_eq_ref)
8194           *eq_ref_tables|= map;
8195         else
8196           *eq_ref_tables&= ~map;
8197         *cached_eq_ref_tables|= map; // now there exists a cached bit
8198       }
8199       if (!is_eq_ref)
8200         return false;
8201     }
8202   }
8203   return true;
8204 }
8205 
8206 /**
8207   Heuristic: Switch from 'ref' to 'range' access if 'range' access can utilize
8208   more keyparts than 'ref' access. Conditions for doing switching:
8209 
8210   1) 'ref' access depends on a constant, not a value read from a table earlier
8211       in the join sequence.
8212 
8213   Rationale: if 'ref' depends on a value from another table, the join condition
8214   is not used to limit the rows read by 'range' access (that would require
8215   dynamic range - 'Range checked for each record'). In other words, if 'ref'
8216   depends on a value from another table, we have a query with conditions of
8217   the form
8218     this_table.idx_col1 = other_table.col AND   <<- used by 'ref'
8219     this_table.idx_col1 OP <const> AND          <<- used by 'range'
8220     this_table.idx_col2 OP <const> AND ...      <<- used by 'range'
8221 
8222   and an index on (idx_col1,idx_col2,...). But the fact that 'range' access
8223   uses more keyparts does not mean that it is more selective than 'ref' access
8224   because these access types utilize different parts of the query condition. We
8225   therefore trust the cost based choice made by best_access_path() instead of
8226   forcing a heuristic choice here.
8227 
8228   2) Range access is possible, and it is less costly than table/index scan.
8229 
8230     3a) 'ref' access and 'range' access uses the same index.
8231     3b) 'range' access uses more keyparts than 'ref' access
8232 
8233     OR
8234 
8235     4) Ref has borrowed the index estimate from range and created a cost
8236        estimate (See Optimize_table_order::find_best_ref). This will be a
8237        problem if range built it's row estimate using a larger number of key
8238        parts than ref. In such a case, shift to range access over the same
8239        index. So run the range optimizer with that index as the only choice.
8240        (Condition 5 is not relevant here since it has been tested in
8241        find_best_ref.)
8242 
8243   @param thd THD      To re-run range optimizer.
8244   @param tab JOIN_TAB To check the above conditions.
8245 
8246   @return true   Range is better than ref
8247   @return false  Ref is better or switch isn't possible
8248 
8249   @todo: This decision should rather be made in best_access_path()
8250 */
can_switch_from_ref_to_range(THD * thd,JOIN_TAB * tab)8251 static bool can_switch_from_ref_to_range(THD *thd, JOIN_TAB *tab)
8252 {
8253   if (!tab->ref.depend_map &&                                          // 1)
8254       tab->quick)                                                      // 2)
8255   {
8256     if ((uint) tab->ref.key == tab->quick->index &&                    // 3a)
8257         tab->ref.key_length < tab->quick->max_used_key_length)         // 3b)
8258       return true;
8259     else if (tab->dodgy_ref_cost)                                      // 4)
8260     {
8261       int error;
8262       SQL_SELECT *select;
8263       JOIN *join= tab->join;
8264       select= make_select(tab->table, join->found_const_table_map,
8265                           join->found_const_table_map,
8266                           *tab->on_expr_ref ? *tab->on_expr_ref : join->conds,
8267                           1, &error);
8268 
8269       if (select)
8270       {
8271         Opt_trace_context * const trace= &thd->opt_trace;
8272         Opt_trace_object trace_wrapper(trace);
8273         Opt_trace_array
8274           trace_setup_cond(trace,
8275                            "rerunning_range_optimizer_for_single_index");
8276 
8277         key_map new_ref_key_map;
8278         new_ref_key_map.set_bit(tab->position->key->key);
8279         bool retcode= false;
8280         if (select->test_quick_select(thd, new_ref_key_map, 0,
8281                                       (join->select_options &
8282                                        OPTION_FOUND_ROWS ? HA_POS_ERROR :
8283                                        join->unit->select_limit_cnt),
8284                                       false,  // don't force quick range
8285                                       ORDER::ORDER_NOT_RELEVANT) > 0)
8286         {
8287           delete tab->quick;
8288           tab->quick= select->quick;
8289           retcode= true;
8290         }
8291         select->quick= 0;
8292         delete select;
8293         return retcode;
8294       }
8295     }
8296   }
8297   return false;
8298 }
8299 
8300 /**
8301   Check if an expression in ORDER BY or GROUP BY is a duplicate of a
8302   preceding expression.
8303 
8304   @param  first_order   the first expression in the ORDER BY or
8305                         GROUP BY clause
8306   @param  possible_dup  the expression that might be a duplicate of
8307                         another expression preceding it the ORDER BY
8308                         or GROUP BY clause
8309 
8310   @returns true if possible_dup is a duplicate, false otherwise
8311 */
duplicate_order(const ORDER * first_order,const ORDER * possible_dup)8312 static bool duplicate_order(const ORDER *first_order,
8313                             const ORDER *possible_dup)
8314 {
8315   const ORDER *order;
8316   for (order=first_order; order ; order=order->next)
8317   {
8318     if (order == possible_dup)
8319     {
8320       // all expressions preceding possible_dup have been checked.
8321       return false;
8322     }
8323     else
8324     {
8325       const Item *it1= order->item[0]->real_item();
8326       const Item *it2= possible_dup->item[0]->real_item();
8327 
8328       if (it1->type() == Item::FIELD_ITEM &&
8329           it2->type() == Item::FIELD_ITEM &&
8330           (static_cast<const Item_field*>(it1)->field ==
8331            static_cast<const Item_field*>(it2)->field))
8332       {
8333         return true;
8334       }
8335     }
8336   }
8337   return false;
8338 }
8339 
8340 /**
8341   Remove all constants and check if ORDER only contains simple
8342   expressions.
8343 
8344   simple_order is set to 1 if sort_order only uses fields from head table
8345   and the head table is not a LEFT JOIN table.
8346 
8347   @param join                   Join handler
8348   @param first_order            List of SORT or GROUP order
8349   @param cond                   WHERE statement
8350   @param change_list            Set to 1 if we should remove things from list.
8351                                 If this is not set, then only simple_order is
8352                                 calculated.
8353   @param simple_order           Set to 1 if we are only using simple expressions
8354   @param clause_type            "ORDER BY" etc for printing in optimizer trace
8355 
8356   @return
8357     Returns new sort order
8358 */
8359 
8360 static ORDER *
remove_const(JOIN * join,ORDER * first_order,Item * cond,bool change_list,bool * simple_order,const char * clause_type)8361 remove_const(JOIN *join,ORDER *first_order, Item *cond,
8362              bool change_list, bool *simple_order, const char *clause_type)
8363 {
8364   if (join->plan_is_const())
8365     return change_list ? 0 : first_order;		// No need to sort
8366 
8367   Opt_trace_context * const trace= &join->thd->opt_trace;
8368   Opt_trace_disable_I_S trace_disabled(trace, first_order == NULL);
8369   Opt_trace_object trace_wrapper(trace);
8370   Opt_trace_object trace_simpl(trace, "clause_processing");
8371   if (trace->is_started())
8372   {
8373     trace_simpl.add_alnum("clause", clause_type);
8374     String str;
8375     st_select_lex::print_order(&str, first_order,
8376                                enum_query_type(QT_TO_SYSTEM_CHARSET |
8377                                                QT_SHOW_SELECT_NUMBER |
8378                                                QT_NO_DEFAULT_DB));
8379     trace_simpl.add_utf8("original_clause", str.ptr(), str.length());
8380   }
8381   Opt_trace_array trace_each_item(trace, "items");
8382 
8383   ORDER *order,**prev_ptr;
8384   table_map first_table= join->join_tab[join->const_tables].table->map;
8385   table_map not_const_tables= ~join->const_table_map;
8386   table_map ref;
8387   // Caches to avoid repeating eq_ref_table() calls, @see eq_ref_table()
8388   table_map eq_ref_tables= 0, cached_eq_ref_tables= 0;
8389   DBUG_ENTER("remove_const");
8390 
8391   prev_ptr= &first_order;
8392   *simple_order= *join->join_tab[join->const_tables].on_expr_ref ? 0 : 1;
8393 
8394   /* NOTE: A variable of not_const_tables ^ first_table; breaks gcc 2.7 */
8395 
8396   update_depend_map(join, first_order);
8397   for (order=first_order; order ; order=order->next)
8398   {
8399     Opt_trace_object trace_one_item(trace);
8400     trace_one_item.add("item", order->item[0]);
8401     table_map order_tables=order->item[0]->used_tables();
8402     if (order->item[0]->with_sum_func ||
8403         /*
8404           If the outer table of an outer join is const (either by itself or
8405           after applying WHERE condition), grouping on a field from such a
8406           table will be optimized away and filesort without temporary table
8407           will be used unless we prevent that now. Filesort is not fit to
8408           handle joins and the join condition is not applied. We can't detect
8409           the case without an expensive test, however, so we force temporary
8410           table for all queries containing more than one table, ROLLUP, and an
8411           outer join.
8412          */
8413         (join->primary_tables > 1 &&
8414          join->rollup.state == ROLLUP::STATE_INITED &&
8415          join->outer_join))
8416       *simple_order=0;				// Must do a temp table to sort
8417     else if (!(order_tables & not_const_tables))
8418     {
8419       if (order->item[0]->has_subquery() &&
8420           !(join->select_lex->options & SELECT_DESCRIBE))
8421       {
8422         Opt_trace_array trace_subselect(trace, "subselect_evaluation");
8423         order->item[0]->val_str(&order->item[0]->str_value);
8424       }
8425       trace_one_item.add("uses_only_constant_tables", true);
8426       continue;					// skip const item
8427     }
8428     else if (duplicate_order(first_order, order))
8429     {
8430       /*
8431         If 'order' is a duplicate of an expression earlier in the
8432         ORDER/GROUP BY sequence, it can be removed from the ORDER BY
8433         or GROUP BY clause.
8434       */
8435       trace_one_item.add("duplicate_item", true);
8436       continue;
8437     }
8438     else if (order->in_field_list && order->item[0]->has_subquery())
8439       /*
8440         If the order item is a subquery that is also in the field
8441         list, a temp table should be used to avoid evaluating the
8442         subquery for each row both when a) creating a sort index and
8443         b) getting the value.
8444           Example: "SELECT (SELECT ... ) as a ... GROUP BY a;"
8445        */
8446       *simple_order= false;
8447     else
8448     {
8449       if (order_tables & (RAND_TABLE_BIT | OUTER_REF_TABLE_BIT))
8450 	*simple_order=0;
8451       else
8452       {
8453 	if (cond && const_expression_in_where(cond,order->item[0]))
8454 	{
8455           trace_one_item.add("equals_constant_in_where", true);
8456 	  continue;
8457 	}
8458 	if ((ref=order_tables & (not_const_tables ^ first_table)))
8459 	{
8460 	  if (!(order_tables & first_table) &&
8461               only_eq_ref_tables(join, first_order, ref,
8462                                  &cached_eq_ref_tables, &eq_ref_tables))
8463 	  {
8464             trace_one_item.add("eq_ref_to_preceding_items", true);
8465 	    continue;
8466 	  }
8467 	  *simple_order=0;			// Must do a temp table to sort
8468 	}
8469       }
8470     }
8471     if (change_list)
8472       *prev_ptr= order;				// use this entry
8473     prev_ptr= &order->next;
8474   }
8475   if (change_list)
8476     *prev_ptr=0;
8477   if (prev_ptr == &first_order)			// Nothing to sort/group
8478     *simple_order=1;
8479   DBUG_PRINT("exit",("simple_order: %d",(int) *simple_order));
8480 
8481   trace_each_item.end();
8482   trace_simpl.add("resulting_clause_is_simple", *simple_order);
8483   if (trace->is_started() && change_list)
8484   {
8485     String str;
8486     st_select_lex::print_order(&str, first_order,
8487                                enum_query_type(QT_TO_SYSTEM_CHARSET |
8488                                                QT_SHOW_SELECT_NUMBER |
8489                                                QT_NO_DEFAULT_DB));
8490     trace_simpl.add_utf8("resulting_clause", str.ptr(), str.length());
8491   }
8492 
8493   DBUG_RETURN(first_order);
8494 }
8495 
8496 
8497 /**
8498   Optimize conditions by
8499 
8500      a) applying transitivity to build multiple equality predicates
8501         (MEP): if x=y and y=z the MEP x=y=z is built.
8502      b) apply constants where possible. If the value of x is known to be
8503         42, x is replaced with a constant of value 42. By transitivity, this
8504         also applies to MEPs, so the MEP in a) will become 42=x=y=z.
8505      c) remove conditions that are impossible or always true
8506 
8507   @param      join         pointer to the structure providing all context info
8508                            for the query
8509   @param      conds        conditions to optimize
8510   @param      join_list    list of join tables to which the condition
8511                            refers to
8512   @param[out] cond_value   Not changed if conds was empty
8513                            COND_TRUE if conds is always true
8514                            COND_FALSE if conds is impossible
8515                            COND_OK otherwise
8516 
8517   @return optimized conditions
8518 */
8519 Item *
optimize_cond(THD * thd,Item * conds,COND_EQUAL ** cond_equal,List<TABLE_LIST> * join_list,bool build_equalities,Item::cond_result * cond_value)8520 optimize_cond(THD *thd, Item *conds, COND_EQUAL **cond_equal,
8521               List<TABLE_LIST> *join_list,
8522               bool build_equalities, Item::cond_result *cond_value)
8523 {
8524   Opt_trace_context * const trace= &thd->opt_trace;
8525   DBUG_ENTER("optimize_cond");
8526 
8527   if (conds)
8528   {
8529     Opt_trace_object trace_wrapper(trace);
8530     Opt_trace_object trace_cond(trace, "condition_processing");
8531     trace_cond.add_alnum("condition", build_equalities ? "WHERE" : "HAVING");
8532     trace_cond.add("original_condition", conds);
8533     Opt_trace_array trace_steps(trace, "steps");
8534 
8535     /*
8536       Build all multiple equality predicates and eliminate equality
8537       predicates that can be inferred from these multiple equalities.
8538       For each reference of a field included into a multiple equality
8539       that occurs in a function set a pointer to the multiple equality
8540       predicate. Substitute a constant instead of this field if the
8541       multiple equality contains a constant.
8542     */
8543     if (build_equalities)
8544     {
8545       Opt_trace_object step_wrapper(trace);
8546       step_wrapper.add_alnum("transformation", "equality_propagation");
8547       {
8548         Opt_trace_disable_I_S
8549           disable_trace_wrapper(trace, !conds->has_subquery());
8550         Opt_trace_array
8551           trace_subselect(trace, "subselect_evaluation");
8552         conds= build_equal_items(thd, conds, NULL, true,
8553                                  join_list, cond_equal);
8554       }
8555       step_wrapper.add("resulting_condition", conds);
8556     }
8557 
8558     /* change field = field to field = const for each found field = const */
8559     {
8560       Opt_trace_object step_wrapper(trace);
8561       step_wrapper.add_alnum("transformation", "constant_propagation");
8562       {
8563         Opt_trace_disable_I_S
8564           disable_trace_wrapper(trace, !conds->has_subquery());
8565         Opt_trace_array
8566           trace_subselect(trace, "subselect_evaluation");
8567         propagate_cond_constants(thd, (I_List<COND_CMP> *) 0, conds, conds);
8568       }
8569       step_wrapper.add("resulting_condition", conds);
8570     }
8571 
8572     /*
8573       Remove all instances of item == item
8574       Remove all and-levels where CONST item != CONST item
8575     */
8576     DBUG_EXECUTE("where",print_where(conds,"after const change", QT_ORDINARY););
8577     {
8578       Opt_trace_object step_wrapper(trace);
8579       step_wrapper.add_alnum("transformation", "trivial_condition_removal");
8580       {
8581         Opt_trace_disable_I_S
8582           disable_trace_wrapper(trace, !conds->has_subquery());
8583         Opt_trace_array trace_subselect(trace, "subselect_evaluation");
8584         conds= remove_eq_conds(thd, conds, cond_value) ;
8585       }
8586       step_wrapper.add("resulting_condition", conds);
8587     }
8588   }
8589   DBUG_RETURN(conds);
8590 }
8591 
8592 
8593 /**
8594   Handles the reqursive job for remove_eq_conds()
8595 
8596   Remove const and eq items. Return new item, or NULL if no condition
8597   cond_value is set to according:
8598   COND_OK    query is possible (field = constant)
8599   COND_TRUE  always true	( 1 = 1 )
8600   COND_FALSE always false	( 1 = 2 )
8601 
8602   SYNPOSIS
8603     remove_eq_conds()
8604     thd 			THD environment
8605     cond                        the condition to handle. Note that cond
8606                                 is changed by this function
8607     cond_value                  the resulting value of the condition
8608 
8609   RETURN
8610     *Item with the simplified condition
8611 */
8612 
8613 static Item *
internal_remove_eq_conds(THD * thd,Item * cond,Item::cond_result * cond_value)8614 internal_remove_eq_conds(THD *thd, Item *cond, Item::cond_result *cond_value)
8615 {
8616   if (cond->type() == Item::COND_ITEM)
8617   {
8618     bool and_level= ((Item_cond*) cond)->functype()
8619       == Item_func::COND_AND_FUNC;
8620     List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
8621     Item::cond_result tmp_cond_value;
8622     bool should_fix_fields=0;
8623 
8624     *cond_value=Item::COND_UNDEF;
8625     Item *item;
8626     while ((item=li++))
8627     {
8628       Item *new_item=internal_remove_eq_conds(thd, item, &tmp_cond_value);
8629       if (!new_item)
8630 	li.remove();
8631       else if (item != new_item)
8632       {
8633 	(void) li.replace(new_item);
8634 	should_fix_fields=1;
8635       }
8636       if (*cond_value == Item::COND_UNDEF)
8637 	*cond_value=tmp_cond_value;
8638       switch (tmp_cond_value) {
8639       case Item::COND_OK:			// Not TRUE or FALSE
8640 	if (and_level || *cond_value == Item::COND_FALSE)
8641 	  *cond_value=tmp_cond_value;
8642 	break;
8643       case Item::COND_FALSE:
8644 	if (and_level)
8645 	{
8646 	  *cond_value=tmp_cond_value;
8647 	  return (Item*) 0;			// Always false
8648 	}
8649 	break;
8650       case Item::COND_TRUE:
8651 	if (!and_level)
8652 	{
8653 	  *cond_value= tmp_cond_value;
8654 	  return (Item*) 0;			// Always true
8655 	}
8656 	break;
8657       case Item::COND_UNDEF:			// Impossible
8658 	break; /* purecov: deadcode */
8659       }
8660     }
8661     if (should_fix_fields)
8662       cond->update_used_tables();
8663 
8664     if (!((Item_cond*) cond)->argument_list()->elements ||
8665 	*cond_value != Item::COND_OK)
8666       return (Item*) 0;
8667     if (((Item_cond*) cond)->argument_list()->elements == 1)
8668     {
8669       /*
8670         BUG#11765699:
8671         We're dealing with an AND or OR item that has only one
8672         argument. However, it is not an option to empty the list
8673         because:
8674 
8675          - this function is called for either JOIN::conds or
8676            JOIN::having, but these point to the same condition as
8677            SELECT_LEX::where and SELECT_LEX::having do.
8678 
8679          - The return value of remove_eq_conds() is assigned to
8680            JOIN::conds and JOIN::having, so emptying the list and
8681            returning the only remaining item "replaces" the AND or OR
8682            with item for the variables in JOIN. However, the return
8683            value is not assigned to the SELECT_LEX counterparts. Thus,
8684            if argument_list is emptied, SELECT_LEX forgets the item in
8685            argument_list()->head().
8686 
8687         item is therefore returned, but argument_list is not emptied.
8688       */
8689       item= ((Item_cond*) cond)->argument_list()->head();
8690       /*
8691         Consider reenabling the line below when the optimizer has been
8692         split into properly separated phases.
8693 
8694         ((Item_cond*) cond)->argument_list()->empty();
8695       */
8696       return item;
8697     }
8698   }
8699   else if (cond->type() == Item::FUNC_ITEM &&
8700 	   ((Item_func*) cond)->functype() == Item_func::ISNULL_FUNC)
8701   {
8702     Item_func_isnull *func=(Item_func_isnull*) cond;
8703     Item **args= func->arguments();
8704     if (args[0]->type() == Item::FIELD_ITEM)
8705     {
8706       Field *field=((Item_field*) args[0])->field;
8707       /* fix to replace 'NULL' dates with '0' (shreeve@uci.edu) */
8708       /*
8709         See BUG#12594011
8710         Documentation says that
8711         SELECT datetime_notnull d FROM t1 WHERE d IS NULL
8712         shall return rows where d=='0000-00-00'
8713 
8714         Thus, for DATE and DATETIME columns defined as NOT NULL,
8715         "date_notnull IS NULL" has to be modified to
8716         "date_notnull IS NULL OR date_notnull == 0" (if outer join)
8717         "date_notnull == 0"                         (otherwise)
8718 
8719       */
8720       if (((field->type() == MYSQL_TYPE_DATE) ||
8721            (field->type() == MYSQL_TYPE_DATETIME)) &&
8722           (field->flags & NOT_NULL_FLAG))
8723       {
8724         Item *item0= new(thd->mem_root) Item_int((longlong)0, 1);
8725         Item *eq_cond= new(thd->mem_root) Item_func_eq(args[0], item0);
8726         if (!eq_cond)
8727           return cond;
8728 
8729         if (args[0]->is_outer_field())
8730         {
8731           // outer join: transform "col IS NULL" to "col IS NULL or col=0"
8732           Item *or_cond= new(thd->mem_root) Item_cond_or(eq_cond, cond);
8733           if (!or_cond)
8734             return cond;
8735           cond= or_cond;
8736         }
8737         else
8738         {
8739           // not outer join: transform "col IS NULL" to "col=0"
8740           cond= eq_cond;
8741         }
8742 
8743         cond->fix_fields(thd, &cond);
8744       }
8745     }
8746     if (cond->const_item())
8747     {
8748       *cond_value= eval_const_cond(cond) ? Item::COND_TRUE : Item::COND_FALSE;
8749       return (Item*) 0;
8750     }
8751   }
8752   else if (cond->const_item() && !cond->is_expensive())
8753   {
8754     *cond_value= eval_const_cond(cond) ? Item::COND_TRUE : Item::COND_FALSE;
8755     return (Item*) 0;
8756   }
8757   else if ((*cond_value= cond->eq_cmp_result()) != Item::COND_OK)
8758   {						// boolan compare function
8759     Item *left_item=	((Item_func*) cond)->arguments()[0];
8760     Item *right_item= ((Item_func*) cond)->arguments()[1];
8761     if (left_item->eq(right_item,1))
8762     {
8763       if (!left_item->maybe_null ||
8764 	  ((Item_func*) cond)->functype() == Item_func::EQUAL_FUNC)
8765 	return (Item*) 0;			// Compare of identical items
8766     }
8767   }
8768   *cond_value=Item::COND_OK;
8769   return cond;					// Point at next and level
8770 }
8771 
8772 
8773 /**
8774   Remove const and eq items. Return new item, or NULL if no condition
8775   cond_value is set to according:
8776   COND_OK    query is possible (field = constant)
8777   COND_TRUE  always true	( 1 = 1 )
8778   COND_FALSE always false	( 1 = 2 )
8779 
8780   SYNPOSIS
8781     remove_eq_conds()
8782     thd 			THD environment
8783     cond                        the condition to handle
8784     cond_value                  the resulting value of the condition
8785 
8786   NOTES
8787     calls the inner_remove_eq_conds to check all the tree reqursively
8788 
8789   RETURN
8790     *Item with the simplified condition
8791 */
8792 
8793 Item *
remove_eq_conds(THD * thd,Item * cond,Item::cond_result * cond_value)8794 remove_eq_conds(THD *thd, Item *cond, Item::cond_result *cond_value)
8795 {
8796   if (cond->type() == Item::FUNC_ITEM &&
8797       ((Item_func*) cond)->functype() == Item_func::ISNULL_FUNC)
8798   {
8799     /*
8800       Handles this special case for some ODBC applications:
8801       The are requesting the row that was just updated with a auto_increment
8802       value with this construct:
8803 
8804       SELECT * from table_name where auto_increment_column IS NULL
8805       This will be changed to:
8806       SELECT * from table_name where auto_increment_column = LAST_INSERT_ID
8807     */
8808 
8809     Item_func_isnull *func=(Item_func_isnull*) cond;
8810     Item **args= func->arguments();
8811     if (args[0]->type() == Item::FIELD_ITEM)
8812     {
8813       Field *field=((Item_field*) args[0])->field;
8814       if (field->flags & AUTO_INCREMENT_FLAG && !field->table->maybe_null &&
8815 	  (thd->variables.option_bits & OPTION_AUTO_IS_NULL) &&
8816 	  (thd->first_successful_insert_id_in_prev_stmt > 0 &&
8817            thd->substitute_null_with_insert_id))
8818       {
8819 #ifdef HAVE_QUERY_CACHE
8820 	query_cache_abort(&thd->query_cache_tls);
8821 #endif
8822 	Item *new_cond;
8823 	if ((new_cond= new Item_func_eq(args[0],
8824 					new Item_int(NAME_STRING("last_insert_id()"),
8825                                                      thd->read_first_successful_insert_id_in_prev_stmt(),
8826                                                      MY_INT64_NUM_DECIMAL_DIGITS))))
8827 	{
8828 	  cond=new_cond;
8829           /*
8830             Item_func_eq can't be fixed after creation so we do not check
8831             cond->fixed, also it do not need tables so we use 0 as second
8832             argument.
8833           */
8834 	  cond->fix_fields(thd, &cond);
8835 	}
8836         /*
8837           IS NULL should be mapped to LAST_INSERT_ID only for first row, so
8838           clear for next row
8839         */
8840         thd->substitute_null_with_insert_id= FALSE;
8841 
8842         *cond_value= Item::COND_OK;
8843         return cond;
8844       }
8845     }
8846   }
8847   return internal_remove_eq_conds(thd, cond, cond_value); // Scan all the condition
8848 }
8849 
8850 
8851 /**
8852   Check if GROUP BY/DISTINCT can be optimized away because the set is
8853   already known to be distinct.
8854 
8855   Used in removing the GROUP BY/DISTINCT of the following types of
8856   statements:
8857   @code
8858     SELECT [DISTINCT] <unique_key_cols>... FROM <single_table_ref>
8859       [GROUP BY <unique_key_cols>,...]
8860   @endcode
8861 
8862     If (a,b,c is distinct)
8863     then <any combination of a,b,c>,{whatever} is also distinct
8864 
8865     This function checks if all the key parts of any of the unique keys
8866     of the table are referenced by a list : either the select list
8867     through find_field_in_item_list or GROUP BY list through
8868     find_field_in_order_list.
8869     If the above holds and the key parts cannot contain NULLs then we
8870     can safely remove the GROUP BY/DISTINCT,
8871     as no result set can be more distinct than an unique key.
8872 
8873   @param tab                  The join table to operate on.
8874   @param find_func            function to iterate over the list and search
8875                               for a field
8876 
8877   @retval
8878     1                    found
8879   @retval
8880     0                    not found.
8881 
8882   @note
8883     The function assumes that make_outerjoin_info() has been called in
8884     order for the check for outer tables to work.
8885 */
8886 
8887 static bool
list_contains_unique_index(JOIN_TAB * tab,bool (* find_func)(Field *,void *),void * data)8888 list_contains_unique_index(JOIN_TAB *tab,
8889                           bool (*find_func) (Field *, void *), void *data)
8890 {
8891   TABLE *table= tab->table;
8892 
8893   if (tab->is_inner_table_of_outer_join())
8894     return 0;
8895   for (uint keynr= 0; keynr < table->s->keys; keynr++)
8896   {
8897     if (keynr == table->s->primary_key ||
8898          (table->key_info[keynr].flags & HA_NOSAME))
8899     {
8900       KEY *keyinfo= table->key_info + keynr;
8901       KEY_PART_INFO *key_part, *key_part_end;
8902 
8903       for (key_part=keyinfo->key_part,
8904            key_part_end=key_part+ keyinfo->user_defined_key_parts;
8905            key_part < key_part_end;
8906            key_part++)
8907       {
8908         if (key_part->field->real_maybe_null() ||
8909             !find_func(key_part->field, data))
8910           break;
8911       }
8912       if (key_part == key_part_end)
8913         return 1;
8914     }
8915   }
8916   return 0;
8917 }
8918 
8919 
8920 /**
8921   Helper function for list_contains_unique_index.
8922   Find a field reference in a list of ORDER structures.
8923   Finds a direct reference of the Field in the list.
8924 
8925   @param field                The field to search for.
8926   @param data                 ORDER *.The list to search in
8927 
8928   @retval
8929     1                    found
8930   @retval
8931     0                    not found.
8932 */
8933 
8934 static bool
find_field_in_order_list(Field * field,void * data)8935 find_field_in_order_list (Field *field, void *data)
8936 {
8937   ORDER *group= (ORDER *) data;
8938   bool part_found= 0;
8939   for (ORDER *tmp_group= group; tmp_group; tmp_group=tmp_group->next)
8940   {
8941     Item *item= (*tmp_group->item)->real_item();
8942     if (item->type() == Item::FIELD_ITEM &&
8943         ((Item_field*) item)->field->eq(field))
8944     {
8945       part_found= 1;
8946       break;
8947     }
8948   }
8949   return part_found;
8950 }
8951 
8952 
8953 /**
8954   Helper function for list_contains_unique_index.
8955   Find a field reference in a dynamic list of Items.
8956   Finds a direct reference of the Field in the list.
8957 
8958   @param[in] field             The field to search for.
8959   @param[in] data              List<Item> *.The list to search in
8960 
8961   @retval
8962     1                    found
8963   @retval
8964     0                    not found.
8965 */
8966 
8967 static bool
find_field_in_item_list(Field * field,void * data)8968 find_field_in_item_list (Field *field, void *data)
8969 {
8970   List<Item> *fields= (List<Item> *) data;
8971   bool part_found= 0;
8972   List_iterator<Item> li(*fields);
8973   Item *item;
8974 
8975   while ((item= li++))
8976   {
8977     if (item->type() == Item::FIELD_ITEM &&
8978         ((Item_field*) item)->field->eq(field))
8979     {
8980       part_found= 1;
8981       break;
8982     }
8983   }
8984   return part_found;
8985 }
8986 
8987 
8988 /**
8989   Create a group by that consist of all non const fields.
8990 
8991   Try to use the fields in the order given by 'order' to allow one to
8992   optimize away 'order by'.
8993 */
8994 
8995 static ORDER *
create_distinct_group(THD * thd,Ref_ptr_array ref_pointer_array,ORDER * order_list,List<Item> & fields,List<Item> & all_fields,bool * all_order_by_fields_used)8996 create_distinct_group(THD *thd, Ref_ptr_array ref_pointer_array,
8997                       ORDER *order_list, List<Item> &fields,
8998                       List<Item> &all_fields,
8999 		      bool *all_order_by_fields_used)
9000 {
9001   List_iterator<Item> li(fields);
9002   Item *item;
9003   Ref_ptr_array orig_ref_pointer_array= ref_pointer_array;
9004   ORDER *order,*group,**prev;
9005 
9006   *all_order_by_fields_used= 1;
9007   while ((item=li++))
9008     item->marker=0;			/* Marker that field is not used */
9009 
9010   prev= &group;  group=0;
9011   for (order=order_list ; order; order=order->next)
9012   {
9013     if (order->in_field_list)
9014     {
9015       ORDER *ord=(ORDER*) thd->memdup((char*) order,sizeof(ORDER));
9016       if (!ord)
9017 	return 0;
9018       *prev=ord;
9019       prev= &ord->next;
9020       (*ord->item)->marker=1;
9021     }
9022     else
9023       *all_order_by_fields_used= 0;
9024   }
9025 
9026   li.rewind();
9027   while ((item=li++))
9028   {
9029     if (!item->const_item() && !item->with_sum_func && !item->marker)
9030     {
9031       /*
9032         Don't put duplicate columns from the SELECT list into the
9033         GROUP BY list.
9034       */
9035       ORDER *ord_iter;
9036       for (ord_iter= group; ord_iter; ord_iter= ord_iter->next)
9037         if ((*ord_iter->item)->eq(item, 1))
9038           goto next_item;
9039 
9040       ORDER *ord=(ORDER*) thd->calloc(sizeof(ORDER));
9041       if (!ord)
9042 	return 0;
9043 
9044       if (item->type() == Item::FIELD_ITEM &&
9045           item->field_type() == MYSQL_TYPE_BIT)
9046       {
9047         /*
9048           Because HEAP tables can't index BIT fields we need to use an
9049           additional hidden field for grouping because later it will be
9050           converted to a LONG field. Original field will remain of the
9051           BIT type and will be returned to a client.
9052           @note setup_ref_array() needs to account for the extra space.
9053         */
9054         Item_field *new_item= new Item_field(thd, (Item_field*)item);
9055         int el= all_fields.elements;
9056         orig_ref_pointer_array[el]= new_item;
9057         all_fields.push_front(new_item);
9058         ord->item= &orig_ref_pointer_array[el];
9059       }
9060       else
9061       {
9062         /*
9063           We have here only field_list (not all_field_list), so we can use
9064           simple indexing of ref_pointer_array (order in the array and in the
9065           list are same)
9066         */
9067         ord->item= &ref_pointer_array[0];
9068       }
9069       ord->direction= ORDER::ORDER_ASC;
9070       *prev=ord;
9071       prev= &ord->next;
9072     }
9073 next_item:
9074     ref_pointer_array.pop_front();
9075   }
9076   *prev=0;
9077   return group;
9078 }
9079 
9080 
9081 /**
9082   Return table number if there is only one table in sort order
9083   and group and order is compatible, else return 0.
9084 */
9085 
9086 static TABLE *
get_sort_by_table(ORDER * a,ORDER * b,TABLE_LIST * tables)9087 get_sort_by_table(ORDER *a,ORDER *b,TABLE_LIST *tables)
9088 {
9089   table_map map= (table_map) 0;
9090   DBUG_ENTER("get_sort_by_table");
9091 
9092   if (!a)
9093     a=b;					// Only one need to be given
9094   else if (!b)
9095     b=a;
9096 
9097   for (; a && b; a=a->next,b=b->next)
9098   {
9099     if (!(*a->item)->eq(*b->item,1))
9100       DBUG_RETURN(0);
9101     map|=a->item[0]->used_tables();
9102   }
9103   map&= ~PARAM_TABLE_BIT;
9104   if (!map || (map & (RAND_TABLE_BIT | OUTER_REF_TABLE_BIT)))
9105     DBUG_RETURN(0);
9106 
9107   for (; !(map & tables->table->map); tables= tables->next_leaf) ;
9108   if (map != tables->table->map)
9109     DBUG_RETURN(0);				// More than one table
9110   DBUG_PRINT("exit",("sort by table: %d",tables->table->tablenr));
9111   DBUG_RETURN(tables->table);
9112 }
9113 
9114 
9115 /**
9116   Create a condition for a const reference for a table.
9117 
9118   @param thd      THD pointer
9119   @param join_tab pointer to the table
9120 
9121   @return A pointer to the created condition for the const reference.
9122   @retval !NULL if the condition was created successfully
9123   @retval NULL if an error has occured
9124 */
9125 
create_cond_for_const_ref(THD * thd,JOIN_TAB * join_tab)9126 static Item_cond_and *create_cond_for_const_ref(THD *thd, JOIN_TAB *join_tab)
9127 {
9128   DBUG_ENTER("create_cond_for_const_ref");
9129   DBUG_ASSERT(join_tab->ref.key_parts);
9130 
9131   TABLE *table= join_tab->table;
9132   Item_cond_and *cond= new Item_cond_and();
9133   if (!cond)
9134     DBUG_RETURN(NULL);
9135 
9136   for (uint i=0 ; i < join_tab->ref.key_parts ; i++)
9137   {
9138     Field *field= table->field[table->key_info[join_tab->ref.key].key_part[i].
9139                                fieldnr-1];
9140     Item *value= join_tab->ref.items[i];
9141     Item *item= new Item_field(field);
9142     if (!item)
9143       DBUG_RETURN(NULL);
9144     item= join_tab->ref.null_rejecting & ((key_part_map)1 << i) ?
9145             (Item *)new Item_func_eq(item, value) :
9146             (Item *)new Item_func_equal(item, value);
9147     if (!item)
9148       DBUG_RETURN(NULL);
9149     if (cond->add(item))
9150       DBUG_RETURN(NULL);
9151   }
9152   cond->fix_fields(thd, (Item**)&cond);
9153 
9154   DBUG_RETURN(cond);
9155 }
9156 
9157 /**
9158   Create a condition for a const reference and add this to the
9159   currenct select for the table.
9160 */
9161 
add_ref_to_table_cond(THD * thd,JOIN_TAB * join_tab)9162 static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab)
9163 {
9164   DBUG_ENTER("add_ref_to_table_cond");
9165   if (!join_tab->ref.key_parts)
9166     DBUG_RETURN(FALSE);
9167 
9168   int error= 0;
9169 
9170   /* Create a condition representing the const reference. */
9171   Item_cond_and *cond= create_cond_for_const_ref(thd, join_tab);
9172   if (!cond)
9173     DBUG_RETURN(TRUE);
9174 
9175   /* Add this condition to the existing select condtion */
9176   if (join_tab->select)
9177   {
9178     if (join_tab->select->cond)
9179     {
9180       error=(int) cond->add(join_tab->select->cond);
9181       cond->update_used_tables();
9182     }
9183     join_tab->set_jt_and_sel_condition(cond, __LINE__);
9184   }
9185   else if ((join_tab->select= make_select(join_tab->table, 0, 0, cond, 0,
9186                                           &error)))
9187     join_tab->set_condition(cond, __LINE__);
9188 
9189   if (join_tab->select)
9190     Opt_trace_object(&thd->opt_trace).add("added_back_ref_condition", cond);
9191   /*
9192     If we have pushed parts of the select condition down to the
9193     storage engine we also need to add the condition for the const
9194     reference to the pre_idx_push_cond since this might be used
9195     later (in test_if_skip_sort_order()) instead of the condition.
9196   */
9197   if (join_tab->pre_idx_push_cond)
9198   {
9199     cond= create_cond_for_const_ref(thd, join_tab);
9200     if (!cond)
9201       DBUG_RETURN(TRUE);
9202     if (cond->add(join_tab->pre_idx_push_cond))
9203       DBUG_RETURN(TRUE);
9204     join_tab->pre_idx_push_cond = cond;
9205   }
9206 
9207   DBUG_RETURN(error ? TRUE : FALSE);
9208 }
9209 
9210 
9211 /**
9212   Remove additional condition inserted by IN/ALL/ANY transformation.
9213 
9214   @param conds   condition for processing
9215 
9216   @return
9217     new conditions
9218 
9219   @note that this function has Bug#13915291.
9220 */
9221 
remove_additional_cond(Item * conds)9222 static Item *remove_additional_cond(Item* conds)
9223 {
9224   // Because it uses in_additional_cond it applies only to the scalar case.
9225   if (conds->item_name.ptr() == in_additional_cond)
9226     return 0;
9227   if (conds->type() == Item::COND_ITEM)
9228   {
9229     Item_cond *cnd= (Item_cond*) conds;
9230     List_iterator<Item> li(*(cnd->argument_list()));
9231     Item *item;
9232     while ((item= li++))
9233     {
9234       if (item->item_name.ptr() == in_additional_cond)
9235       {
9236 	li.remove();
9237 	if (cnd->argument_list()->elements == 1)
9238 	  return cnd->argument_list()->head();
9239 	return conds;
9240       }
9241     }
9242   }
9243   return conds;
9244 }
9245 
9246 
9247 /*
9248   Index lookup-based subquery: save some flags for EXPLAIN output
9249 
9250   SYNOPSIS
9251     save_index_subquery_explain_info()
9252       join_tab  Subquery's join tab (there is only one as index lookup is
9253                 only used for subqueries that are single-table SELECTs)
9254       where     Subquery's WHERE clause
9255 
9256   DESCRIPTION
9257     For index lookup-based subquery (subselect_indexsubquery_engine),
9258     check its EXPLAIN output row should contain
9259       "Using index" (TAB_INFO_FULL_SCAN_ON_NULL)
9260       "Using Where" (TAB_INFO_USING_WHERE)
9261       "Full scan on NULL key" (TAB_INFO_FULL_SCAN_ON_NULL)
9262     and set appropriate flags in join_tab->packed_info.
9263 
9264   TODO:
9265     packed_info causes duplication in EXPLAIN code. For example, we print
9266     "using where" in 2 places of EXPLAIN code: if tab->condition(), OR if
9267     'packed_info & TAB_INFO_USING_WHERE'.
9268     indexsubquery_engine is the only user of
9269     save_index_subquery_explain_info().
9270     packed_info is almost useless today, it would be good to get rid of it
9271     (and thus of save_index_subquery_explain_info()).
9272 */
9273 
save_index_subquery_explain_info(JOIN_TAB * join_tab,Item * where)9274 static void save_index_subquery_explain_info(JOIN_TAB *join_tab, Item* where)
9275 {
9276   join_tab->packed_info= TAB_INFO_HAVE_VALUE;
9277 
9278   /*
9279     This is actually not needed, 'non-packed-info' branch of EXPLAIN naturally
9280     reads covering_keys and produces the desired 'Using index'
9281   */
9282   if (join_tab->table->covering_keys.is_set(join_tab->ref.key))
9283     join_tab->packed_info |= TAB_INFO_USING_INDEX;
9284 
9285   /*
9286     This is needed, because 'where' (==join->conds) may be NULL, or
9287     shorter than select->cond/tab->condition(), due to
9288     remove_subq_pushed_predicates() and remove_additional_cond(); the real
9289     condition which will be checked for each row is
9290     indexsubquery_engine::cond (==join->conds).
9291     Still this should be solvable without TAB_INFO_USING_WHERE.
9292   */
9293   if (where)
9294     join_tab->packed_info |= TAB_INFO_USING_WHERE;
9295 
9296   /*
9297     This is actually not needed, 'non-packed-info' branch of EXPLAIN naturally
9298     reads has_guarded_conds() and produces the desired 'Full scan on NULL
9299     key'.
9300   */
9301   if (join_tab->has_guarded_conds())
9302     join_tab->packed_info|= TAB_INFO_FULL_SCAN_ON_NULL;
9303 }
9304 
9305 
9306 /**
9307   Update some values in keyuse for faster choose_table_order() loop.
9308 */
9309 
optimize_keyuse(JOIN * join,Key_use_array * keyuse_array)9310 static void optimize_keyuse(JOIN *join, Key_use_array *keyuse_array)
9311 {
9312   for (size_t ix= 0; ix < keyuse_array->size(); ++ix)
9313   {
9314     Key_use *keyuse= &keyuse_array->at(ix);
9315     table_map map;
9316     /*
9317       If we find a ref, assume this table matches a proportional
9318       part of this table.
9319       For example 100 records matching a table with 5000 records
9320       gives 5000/100 = 50 records per key
9321       Constant tables are ignored.
9322       To avoid bad matches, we don't make ref_table_rows less than 100.
9323     */
9324     keyuse->ref_table_rows= ~(ha_rows) 0;	// If no ref
9325     if (keyuse->used_tables &
9326 	(map= (keyuse->used_tables & ~join->const_table_map &
9327 	       ~OUTER_REF_TABLE_BIT)))
9328     {
9329       uint tablenr;
9330       for (tablenr=0 ; ! (map & 1) ; map>>=1, tablenr++) ;
9331       if (map == 1)			// Only one table
9332       {
9333 	TABLE *tmp_table= join->join_tab[tablenr].table;
9334 	keyuse->ref_table_rows= max<ha_rows>(tmp_table->file->stats.records, 100);
9335       }
9336     }
9337     /*
9338       Outer reference (external field) is constant for single executing
9339       of subquery
9340     */
9341     if (keyuse->used_tables == OUTER_REF_TABLE_BIT)
9342       keyuse->ref_table_rows= 1;
9343   }
9344 }
9345 
9346 
optimize_fts_query()9347 void JOIN::optimize_fts_query()
9348 {
9349   if (primary_tables > 1)
9350     return;    // We only optimize single table FTS queries
9351 
9352   JOIN_TAB * const tab= &(join_tab[0]);
9353   if (tab->type != JT_FT)
9354     return;    // Access is not using FTS result
9355 
9356   if ((tab->table->file->ha_table_flags() & HA_CAN_FULLTEXT_EXT) == 0)
9357     return;    // Optimizations requires extended FTS support by table engine
9358 
9359   Item_func_match* fts_result= static_cast<Item_func_match*>(tab->keyuse->val);
9360 
9361   /* If we are ordering on the rank of the same result as is used for access,
9362      and the table engine deliver result ordered by rank, we can drop ordering.
9363    */
9364   if (order != NULL
9365       && order->next == NULL &&
9366       order->direction == ORDER::ORDER_DESC &&
9367       fts_result->eq(*(order->item), true))
9368   {
9369     Item_func_match* fts_item=
9370       static_cast<Item_func_match*>(*(order->item));
9371 
9372     /* If we applied the LIMIT optimization @see optimize_fts_limit_query,
9373        check that the number of matching rows is sufficient.
9374        Otherwise, revert this optimization and use table scan instead.
9375     */
9376     if (min_ft_matches != HA_POS_ERROR &&
9377         min_ft_matches > fts_item->get_count())
9378     {
9379       // revert to table scan, do things make_join_readinfo would have done
9380       tab->type= JT_ALL;
9381       tab->read_first_record= join_init_read_record;
9382       tab->use_quick= QS_NONE;
9383       tab->ref.key= -1;
9384 
9385       // Reset join condition
9386       tab->select->cond= NULL;
9387       conds= NULL;
9388 
9389       thd->set_status_no_index_used();
9390       // make_join_readinfo only calls inc_status_select_scan()
9391       // when this is not SELECT_DESCRIBE
9392       DBUG_ASSERT((select_options & SELECT_DESCRIBE) == 0);
9393       thd->inc_status_select_scan();
9394 
9395       return;
9396     }
9397     else if (fts_item->ordered_result())
9398       order= NULL;
9399   }
9400 
9401   /* Check whether the FTS result is covering.  If only document id
9402      and rank is needed, there is no need to access table rows.
9403   */
9404   List_iterator<Item> it(all_fields);
9405   Item *item;
9406   // This optimization does not work with filesort nor GROUP BY
9407   bool covering= (!order && !group);
9408   bool docid_found= false;
9409   while (covering && (item= it++))
9410   {
9411     switch (item->type()) {
9412     case Item::FIELD_ITEM:
9413     {
9414       Item_field *item_field= static_cast<Item_field*>(item);
9415       if (strcmp(item_field->field_name, FTS_DOC_ID_COL_NAME) == 0)
9416       {
9417         docid_found= true;
9418         covering= fts_result->docid_in_result();
9419       }
9420       else
9421         covering= false;
9422       break;
9423     }
9424     case Item::FUNC_ITEM:
9425       if (static_cast<Item_func*>(item)->functype() == Item_func::FT_FUNC)
9426       {
9427         Item_func_match* fts_item= static_cast<Item_func_match*>(item);
9428         if (fts_item->eq(fts_result, true))
9429           break;
9430       }
9431       // Fall-through when not an equivalent MATCH expression
9432     default:
9433       covering= false;
9434     }
9435   }
9436 
9437   if (covering)
9438   {
9439     if (docid_found)
9440     {
9441       replace_item_field(FTS_DOC_ID_COL_NAME,
9442                          new Item_func_docid(reinterpret_cast<FT_INFO_EXT*>
9443                                              (fts_result->ft_handler)));
9444     }
9445 
9446     // Tell storage engine that row access is not necessary
9447     fts_result->table->set_keyread(true);
9448     fts_result->table->covering_keys.set_bit(fts_result->key);
9449   }
9450 }
9451 
9452 
9453   /**
9454      Optimize FTS queries with ORDER BY/LIMIT, but no WHERE clause.
9455 
9456      If MATCH expression is not in WHERE clause, but in ORDER BY,
9457      JT_FT access will not apply. However, if we are ordering on rank and
9458      there is a limit, normally, only the top ranking rows are needed
9459      returned, and one would benefit from the optimizations associated
9460      with JT_FT acess (@see optimize_fts_query).  To get JT_FT access we
9461      will add the MATCH expression to the WHERE clause.
9462 
9463      @note This optimization will only be applied to single table
9464            queries with no existing WHERE clause.
9465      @note This transformation is not correct if number of matches
9466            is less than the number of rows requested by limit.
9467            If this turns out to be the case, the transformation will
9468            be reverted @see optimize_fts_query()
9469    */
9470 void
optimize_fts_limit_query()9471 JOIN::optimize_fts_limit_query()
9472 {
9473   /*
9474      Only do this optimization if
9475      1. It is a single table query
9476      2. There is no WHERE condition
9477      3. There is a single ORDER BY element
9478      4. Ordering is descending
9479      5. There is a LIMIT clause
9480      6. Ordering is on a MATCH expression
9481    */
9482   if (primary_tables == 1 &&                        // 1
9483       conds == NULL &&                              // 2
9484       order && order->next == NULL &&     // 3
9485       order->direction == ORDER::ORDER_DESC && // 4
9486       m_select_limit != HA_POS_ERROR)               // 5
9487   {
9488     DBUG_ASSERT(order->item);
9489     Item* item= *order->item;
9490     DBUG_ASSERT(item);
9491 
9492     if (item->type() == Item::FUNC_ITEM &&
9493         static_cast<Item_func*>(item)->functype() == Item_func::FT_FUNC)  // 6
9494     {
9495       conds= item;
9496       min_ft_matches= m_select_limit;
9497     }
9498   }
9499 }
9500 
9501 
9502 /**
9503    For {semijoin,subquery} materialization: calculates various cost
9504    information, based on a plan in join->best_positions covering the
9505    to-be-materialized query block and only this.
9506 
9507    @param join     JOIN where plan can be found
9508    @param sj_nest  sj materialization nest (NULL if subquery materialization)
9509    @param n_tables number of to-be-materialized tables
9510    @param[out] sjm where computed costs will be stored
9511 
9512    @note that this function modifies join->map2table, which has to be filled
9513    correctly later.
9514 */
calculate_materialization_costs(JOIN * join,TABLE_LIST * sj_nest,uint n_tables,Semijoin_mat_optimize * sjm)9515 static void calculate_materialization_costs(JOIN *join,
9516                                             TABLE_LIST *sj_nest,
9517                                             uint n_tables,
9518                                             Semijoin_mat_optimize *sjm)
9519 {
9520   double mat_cost;             // Estimated cost of materialization
9521   double mat_rowcount;         // Estimated row count before duplicate removal
9522   double distinct_rowcount;    // Estimated rowcount after duplicate removal
9523   List<Item> *inner_expr_list;
9524 
9525   if (sj_nest)
9526   {
9527     /*
9528       get_partial_join_cost() assumes a regular join, which is correct when
9529       we optimize a sj-materialization nest (always executed as regular
9530       join).
9531       @todo consider using join->best_rowcount instead.
9532     */
9533     get_partial_join_cost(join, n_tables,
9534                           &mat_cost, &mat_rowcount);
9535     n_tables+= join->const_tables;
9536     inner_expr_list= &sj_nest->nested_join->sj_inner_exprs;
9537   }
9538   else
9539   {
9540     mat_cost= join->best_read;
9541     mat_rowcount= join->best_rowcount;
9542     inner_expr_list= &join->select_lex->item_list;
9543   }
9544 
9545   /*
9546     Adjust output cardinality estimates. If the subquery has form
9547 
9548     ... oe IN (SELECT t1.colX, t2.colY, func(X,Y,Z) )
9549 
9550     then the number of distinct output record combinations has an
9551     upper bound of product of number of records matching the tables
9552     that are used by the SELECT clause.
9553     TODO:
9554     We can get a more precise estimate if we
9555      - use rec_per_key cardinality estimates. For simple cases like
9556      "oe IN (SELECT t.key ...)" it is trivial.
9557      - Functional dependencies between the tables in the semi-join
9558      nest (the payoff is probably less here?)
9559   */
9560   {
9561     for (uint i=0 ; i < n_tables ; i++)
9562     {
9563       JOIN_TAB * const tab= join->best_positions[i].table;
9564       join->map2table[tab->table->tablenr]= tab;
9565     }
9566     List_iterator<Item> it(*inner_expr_list);
9567     Item *item;
9568     table_map map= 0;
9569     while ((item= it++))
9570       map|= item->used_tables();
9571     map&= ~PSEUDO_TABLE_BITS;
9572     Table_map_iterator tm_it(map);
9573     int tableno;
9574     double rows= 1.0;
9575     while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
9576       rows*= join->map2table[tableno]->table->quick_condition_rows;
9577     distinct_rowcount= min(mat_rowcount, rows);
9578   }
9579   /*
9580     Calculate temporary table parameters and usage costs
9581   */
9582   const uint rowlen= get_tmp_table_rec_length(*inner_expr_list);
9583 
9584   double row_cost;    // The cost to write or lookup a row in temp. table
9585   double create_cost; // The cost to create a temporary table
9586   if (rowlen * distinct_rowcount <
9587       join->thd->variables.max_heap_table_size)
9588   {
9589     row_cost=    HEAP_TEMPTABLE_ROW_COST;
9590     create_cost= HEAP_TEMPTABLE_CREATE_COST;
9591   }
9592   else
9593   {
9594     row_cost=    DISK_TEMPTABLE_ROW_COST;
9595     create_cost= DISK_TEMPTABLE_CREATE_COST;
9596   }
9597 
9598   /*
9599     Let materialization cost include the cost to create the temporary
9600     table and write the rows into it:
9601   */
9602   mat_cost+= create_cost + (mat_rowcount * row_cost);
9603   sjm->materialization_cost.reset();
9604   sjm->materialization_cost
9605     .add_io(mat_cost);
9606 
9607   sjm->expected_rowcount= distinct_rowcount;
9608 
9609   /*
9610     Set the cost to do a full scan of the temptable (will need this to
9611     consider doing sjm-scan):
9612   */
9613   sjm->scan_cost.reset();
9614   if (distinct_rowcount > 0.0)
9615     sjm->scan_cost.add_io(distinct_rowcount * row_cost);
9616 
9617   sjm->lookup_cost.reset();
9618   sjm->lookup_cost.add_io(row_cost);
9619 }
9620 
9621 
9622 /**
9623    Decides between EXISTS and materialization; performs last steps to set up
9624    the chosen strategy.
9625    @returns 'false' if no error
9626 
9627    @note If UNION this is called on each contained JOIN.
9628 
9629  */
decide_subquery_strategy()9630 bool JOIN::decide_subquery_strategy()
9631 {
9632   DBUG_ASSERT(unit->item);
9633 
9634   switch (unit->item->substype())
9635   {
9636   case Item_subselect::IN_SUBS:
9637   case Item_subselect::ALL_SUBS:
9638   case Item_subselect::ANY_SUBS:
9639     // All of those are children of Item_in_subselect and may use EXISTS
9640     break;
9641   default:
9642     return false;
9643   }
9644 
9645   Item_in_subselect * const in_pred=
9646     static_cast<Item_in_subselect *>(unit->item);
9647 
9648   Item_exists_subselect::enum_exec_method chosen_method= in_pred->exec_method;
9649   // Materialization does not allow UNION so this can't happen:
9650   DBUG_ASSERT(chosen_method != Item_exists_subselect::EXEC_MATERIALIZATION);
9651 
9652   if ((chosen_method == Item_exists_subselect::EXEC_EXISTS_OR_MAT) &&
9653       compare_costs_of_subquery_strategies(&chosen_method))
9654     return true;
9655 
9656   switch (chosen_method)
9657   {
9658   case Item_exists_subselect::EXEC_EXISTS:
9659     return in_pred->finalize_exists_transform(select_lex);
9660   case Item_exists_subselect::EXEC_MATERIALIZATION:
9661     return in_pred->finalize_materialization_transform(this);
9662   default:
9663     DBUG_ASSERT(false);
9664     return true;
9665   }
9666 }
9667 
9668 
9669 /**
9670    Tells what is the cheapest between IN->EXISTS and subquery materialization,
9671    in terms of cost, for the subquery's JOIN.
9672    Input:
9673    - join->{best_positions,best_read,best_rowcount} must contain the
9674    execution plan of EXISTS (where 'join' is the subquery's JOIN)
9675    - join2->{best_positions,best_read,best_rowcount} must be correctly set
9676    (where 'join2' is the parent join, the grandparent join, etc).
9677    Output:
9678    join->{best_positions,best_read,best_rowcount} contain the cheapest
9679    execution plan (where 'join' is the subquery's JOIN).
9680 
9681    This plan choice has to happen before calling functions which set up
9682    execution structures, like JOIN::get_best_combination() or
9683    JOIN::set_access_methods().
9684 
9685    @param[out] method  chosen method (EXISTS or materialization) will be put
9686                        here.
9687    @returns false if success
9688 */
compare_costs_of_subquery_strategies(Item_exists_subselect::enum_exec_method * method)9689 bool JOIN::compare_costs_of_subquery_strategies(
9690                Item_exists_subselect::enum_exec_method *method)
9691 {
9692   *method= Item_exists_subselect::EXEC_EXISTS;
9693 
9694   if (!thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MATERIALIZATION))
9695     return false;
9696 
9697   const JOIN *parent_join= unit->outer_select()->join;
9698   if (!parent_join || !parent_join->child_subquery_can_materialize)
9699     return false;
9700 
9701   Item_in_subselect * const in_pred=
9702     static_cast<Item_in_subselect *>(unit->item);
9703 
9704   /*
9705     Testing subquery_allows_etc() at each optimization is necessary as each
9706     execution of a prepared statement may use a different type of parameter.
9707   */
9708   if (!subquery_allows_materialization(in_pred, thd, select_lex,
9709                                        select_lex->outer_select()))
9710     return false;
9711 
9712   Opt_trace_context * const trace= &thd->opt_trace;
9713   Opt_trace_object trace_wrapper(trace);
9714   Opt_trace_object
9715     trace_subqmat(trace, "execution_plan_for_potential_materialization");
9716   const double saved_best_read= best_read;
9717   const ha_rows saved_best_rowcount= best_rowcount;
9718   POSITION * const saved_best_pos= best_positions;
9719 
9720   if (in_pred->in2exists_added_to_where())
9721   {
9722     Opt_trace_array trace_subqmat_steps(trace, "steps");
9723 
9724     // Up to one extra slot per semi-join nest is needed (if materialized)
9725     const uint sj_nests= select_lex->sj_nests.elements;
9726 
9727     if (!(best_positions= new (thd->mem_root) POSITION[tables + sj_nests + 1]))
9728       return true;
9729 
9730     // Compute plans which do not use outer references
9731 
9732     DBUG_ASSERT(allow_outer_refs);
9733     allow_outer_refs= false;
9734 
9735     if (optimize_semijoin_nests_for_materialization(this))
9736       return true;
9737 
9738     if (Optimize_table_order(thd, this, NULL).choose_table_order())
9739       return true;
9740   }
9741   else
9742   {
9743     /*
9744       If IN->EXISTS didn't add any condition to WHERE (only to HAVING, which
9745       can happen if subquery has aggregates) then the plan for materialization
9746       will be the same as for EXISTS - don't compute it again.
9747     */
9748     trace_subqmat.add("surely_same_plan_as_EXISTS", true).
9749       add_alnum("cause", "EXISTS_did_not_change_WHERE");
9750   }
9751 
9752   Semijoin_mat_optimize sjm;
9753   calculate_materialization_costs(this, NULL, primary_tables, &sjm);
9754 
9755   /*
9756     The number of evaluations of the subquery influences costs, we need to
9757     compute it.
9758   */
9759   Opt_trace_object trace_subq_mat_decision(trace, "subq_mat_decision");
9760   Opt_trace_array trace_parents(trace, "parent_fanouts");
9761   const Item_subselect *subs= in_pred;
9762   double subq_executions= 1.0;
9763   for(;;)
9764   {
9765     Opt_trace_object trace_parent(trace);
9766     trace_parent.add_select_number(parent_join->select_lex->select_number);
9767     double parent_fanout;
9768     if (// safety, not sure needed
9769         parent_join->plan_is_const() ||
9770         // if subq is in condition on constant table:
9771         !parent_join->child_subquery_can_materialize)
9772     {
9773       parent_fanout= 1.0;
9774       trace_parent.add("subq_attached_to_const_table", true);
9775     }
9776     else
9777     {
9778       if (subs->in_cond_of_tab != INT_MIN)
9779       {
9780         /*
9781           Subquery is attached to a certain 'pos', pos[-1].prefix_record_count
9782           is the number of times we'll start a loop accessing 'pos'; each such
9783           loop will read pos->records_read records of 'pos', so subquery will
9784           be evaluated pos[-1].prefix_record_count * pos->records_read times.
9785           Exceptions:
9786           - if 'pos' is first, use 1 instead of pos[-1].prefix_record_count
9787           - if 'pos' is first of a sjerialization-mat nest, same.
9788 
9789           If in a sj-materialization nest, pos->records_read and
9790           pos[-1].prefix_record_count are of the "nest materialization" plan
9791           (copied back in fix_semijoin_strategies()), which is
9792           appropriate as it corresponds to evaluations of our subquery.
9793         */
9794         const uint idx= subs->in_cond_of_tab;
9795         DBUG_ASSERT((int)idx >= 0 && idx < parent_join->tables);
9796         trace_parent.add("subq_attached_to_table", true);
9797         trace_parent.add_utf8_table(parent_join->join_tab[idx].table);
9798         parent_fanout= parent_join->join_tab[idx].position->records_read;
9799         if ((idx > parent_join->const_tables) &&
9800             !sj_is_materialize_strategy(parent_join
9801                                         ->join_tab[idx].position->sj_strategy))
9802           parent_fanout*=
9803             parent_join->join_tab[idx - 1].position->prefix_record_count;
9804       }
9805       else
9806       {
9807         /*
9808           Subquery is SELECT list, GROUP BY, ORDER BY, HAVING: it is evaluated
9809           at the end of the parent join's execution.
9810           It can be evaluated once per row-before-grouping:
9811           SELECT SUM(t1.col IN (subq)) FROM t1 GROUP BY expr;
9812           or once per row-after-grouping:
9813           SELECT SUM(t1.col) AS s FROM t1 GROUP BY expr HAVING s IN (subq),
9814           SELECT SUM(t1.col) IN (subq) FROM t1 GROUP BY expr
9815           It's hard to tell. We simply assume 'once per
9816           row-before-grouping'.
9817 
9818           Another approximation:
9819           SELECT ... HAVING x IN (subq) LIMIT 1
9820           best_rowcount=1 due to LIMIT, though HAVING (and thus the subquery)
9821           may be evaluated many times before HAVING becomes true and the limit
9822           is reached.
9823         */
9824         trace_parent.add("subq_attached_to_join_result", true);
9825         parent_fanout= parent_join->best_rowcount;
9826       }
9827     }
9828     subq_executions*= parent_fanout;
9829     trace_parent.add("fanout", parent_fanout);
9830     const bool cacheable= parent_join->select_lex->is_cacheable();
9831     trace_parent.add("cacheable", cacheable);
9832     if (cacheable)
9833     {
9834       // Parent executed only once
9835       break;
9836     }
9837     /*
9838       Parent query is executed once per outer row => go up to find number of
9839       outer rows. Example:
9840       SELECT ... IN(subq-with-in2exists WHERE ... IN (subq-with-mat))
9841     */
9842     if (!(subs= parent_join->unit->item))
9843     {
9844       // derived table, materialized only once
9845       break;
9846     }
9847     parent_join= parent_join->unit->outer_select()->join;
9848     if (!parent_join)
9849     {
9850       /*
9851         May be single-table UPDATE/DELETE, has no join.
9852         @todo  we should find how many rows it plans to UPDATE/DELETE, taking
9853         inspiration in Explain_table::explain_rows_and_filtered().
9854         This is not a priority as it applies only to
9855         UPDATE - child(non-mat-subq) - grandchild(may-be-mat-subq).
9856         And it will autosolve the day UPDATE gets a JOIN.
9857       */
9858       break;
9859     }
9860   }  // for(;;)
9861   trace_parents.end();
9862 
9863   const double cost_exists= subq_executions * saved_best_read;
9864   const double cost_mat_table= sjm.materialization_cost.total_cost();
9865   const double cost_mat= cost_mat_table + subq_executions *
9866     sjm.lookup_cost.total_cost();
9867   const bool mat_chosen=
9868     thd->optimizer_switch_flag(OPTIMIZER_SWITCH_SUBQ_MAT_COST_BASED) ?
9869     (cost_mat < cost_exists) : true;
9870   trace_subq_mat_decision
9871     .add("cost_to_create_and_fill_materialized_table",
9872          cost_mat_table)
9873     .add("cost_of_one_EXISTS", saved_best_read)
9874     .add("number_of_subquery_evaluations", subq_executions)
9875     .add("cost_of_materialization", cost_mat)
9876     .add("cost_of_EXISTS", cost_exists)
9877     .add("chosen", mat_chosen);
9878   if (mat_chosen)
9879     *method= Item_exists_subselect::EXEC_MATERIALIZATION;
9880   else
9881   {
9882     best_read= saved_best_read;
9883     best_rowcount= saved_best_rowcount;
9884     best_positions= saved_best_pos;
9885     /*
9886       Don't restore JOIN::positions or best_ref, they're not used
9887       afterwards. best_positions is (like: by get_sj_strategy()).
9888     */
9889   }
9890   return false;
9891 }
9892 
9893 
9894 /**
9895   Refine the best_rowcount estimation based on what happens after tables
9896   have been joined: LIMIT and type of result sink.
9897  */
refine_best_rowcount()9898 void JOIN::refine_best_rowcount()
9899 {
9900   // If plan is const, 0 or 1 rows should be returned
9901   DBUG_ASSERT(!plan_is_const() || best_rowcount <= 1);
9902 
9903   if (plan_is_const())
9904     return;
9905 
9906   /*
9907     If a derived table, or a member of a UNION which itself forms a derived
9908     table:
9909     setting estimate to 0 or 1 row would mark the derived table as const.
9910     The row count is bumped to the nearest higher value, so that the
9911     query block will not be evaluated during optimization.
9912   */
9913   if (best_rowcount <= 1 &&
9914       select_lex->master_unit()->first_select()->linkage ==
9915       DERIVED_TABLE_TYPE)
9916     best_rowcount= 2;
9917 
9918   /*
9919     There will be no more rows than defined in the LIMIT clause. Use it
9920     as an estimate. If LIMIT 1 is specified, the query block will be
9921     considered "const", with actual row count 0 or 1.
9922   */
9923   set_if_smaller(best_rowcount, unit->select_limit_cnt);
9924 }
9925 
9926 /**
9927   @} (end of group Query_Optimizer)
9928 */
9929