1 /* Copyright (c) 2000, 2016, Oracle and/or its affiliates.
2    Copyright (c) 2009, 2021, MariaDB Corporation.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; version 2 of the License.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software
15    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA */
16 
17 /**
18   @file
19 
20   @brief
21   mysql_select and join optimization
22 
23 
24   @defgroup Query_Optimizer  Query Optimizer
25   @{
26 */
27 
28 #ifdef USE_PRAGMA_IMPLEMENTATION
29 #pragma implementation				// gcc: Class implementation
30 #endif
31 
32 #include "mariadb.h"
33 #include "sql_priv.h"
34 #include "unireg.h"
35 #include "sql_select.h"
36 #include "sql_cache.h"                          // query_cache_*
37 #include "sql_table.h"                          // primary_key_name
38 #include "probes_mysql.h"
39 #include "key.h"                 // key_copy, key_cmp, key_cmp_if_same
40 #include "lock.h"                // mysql_unlock_some_tables,
41                                  // mysql_unlock_read_tables
42 #include "sql_show.h"            // append_identifier
43 #include "sql_base.h"            // setup_wild, setup_fields, fill_record
44 #include "sql_parse.h"                          // check_stack_overrun
45 #include "sql_partition.h"       // make_used_partitions_str
46 #include "sql_acl.h"             // *_ACL
47 #include "sql_test.h"            // print_where, print_keyuse_array,
48                                  // print_sjm, print_plan, TEST_join
49 #include "records.h"             // init_read_record, end_read_record
50 #include "filesort.h"            // filesort_free_buffers
51 #include "sql_union.h"           // mysql_union
52 #include "opt_subselect.h"
53 #include "sql_derived.h"
54 #include "sql_statistics.h"
55 #include "sql_cte.h"
56 #include "sql_window.h"
57 #include "tztime.h"
58 
59 #include "debug_sync.h"          // DEBUG_SYNC
60 #include <m_ctype.h>
61 #include <my_bit.h>
62 #include <hash.h>
63 #include <ft_global.h>
64 #include "sys_vars_shared.h"
65 #include "sp_head.h"
66 #include "sp_rcontext.h"
67 
68 /*
69   A key part number that means we're using a fulltext scan.
70 
71   In order not to confuse it with regular equalities, we need to pick
72   a number that's greater than MAX_REF_PARTS.
73 
74   Hash Join code stores field->field_index in KEYUSE::keypart, so the
75   number needs to be bigger than MAX_FIELDS, also.
76 
77   CAUTION: sql_test.cc has its own definition of FT_KEYPART.
78 */
79 #define FT_KEYPART   (MAX_FIELDS+10)
80 
81 const char *join_type_str[]={ "UNKNOWN","system","const","eq_ref","ref",
82 			      "MAYBE_REF","ALL","range","index","fulltext",
83 			      "ref_or_null","unique_subquery","index_subquery",
84                               "index_merge", "hash_ALL", "hash_range",
85                               "hash_index", "hash_index_merge" };
86 
87 LEX_CSTRING group_key= {STRING_WITH_LEN("group_key")};
88 LEX_CSTRING distinct_key= {STRING_WITH_LEN("distinct_key")};
89 
90 struct st_sargable_param;
91 
92 static bool make_join_statistics(JOIN *join, List<TABLE_LIST> &leaves,
93                                  DYNAMIC_ARRAY *keyuse);
94 static bool update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,
95                                 JOIN_TAB *join_tab,
96                                 uint tables, COND *conds,
97                                 table_map table_map, SELECT_LEX *select_lex,
98                                 SARGABLE_PARAM **sargables);
99 static int sort_keyuse(KEYUSE *a,KEYUSE *b);
100 static bool are_tables_local(JOIN_TAB *jtab, table_map used_tables);
101 static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
102 			       bool allow_full_scan, table_map used_tables);
103 static void optimize_straight_join(JOIN *join, table_map join_tables);
104 static bool greedy_search(JOIN *join, table_map remaining_tables,
105                           uint depth, uint prune_level,
106                           uint use_cond_selectivity);
107 static bool best_extension_by_limited_search(JOIN *join,
108                                              table_map remaining_tables,
109                                              uint idx, double record_count,
110                                              double read_time, uint depth,
111                                              uint prune_level,
112                                              uint use_cond_selectivity);
113 static uint determine_search_depth(JOIN* join);
114 C_MODE_START
115 static int join_tab_cmp(const void *dummy, const void* ptr1, const void* ptr2);
116 static int join_tab_cmp_straight(const void *dummy, const void* ptr1, const void* ptr2);
117 static int join_tab_cmp_embedded_first(const void *emb, const void* ptr1, const void *ptr2);
118 C_MODE_END
119 static uint cache_record_length(JOIN *join,uint index);
120 static store_key *get_store_key(THD *thd,
121 				KEYUSE *keyuse, table_map used_tables,
122 				KEY_PART_INFO *key_part, uchar *key_buff,
123 				uint maybe_null);
124 static bool make_outerjoin_info(JOIN *join);
125 static Item*
126 make_cond_after_sjm(THD *thd, Item *root_cond, Item *cond, table_map tables,
127                     table_map sjm_tables, bool inside_or_clause);
128 static bool make_join_select(JOIN *join,SQL_SELECT *select,COND *item);
129 static void revise_cache_usage(JOIN_TAB *join_tab);
130 static bool make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after);
131 static bool only_eq_ref_tables(JOIN *join, ORDER *order, table_map tables);
132 static void update_depend_map(JOIN *join);
133 static void update_depend_map_for_order(JOIN *join, ORDER *order);
134 static ORDER *remove_const(JOIN *join,ORDER *first_order,COND *cond,
135 			   bool change_list, bool *simple_order);
136 static int return_zero_rows(JOIN *join, select_result *res,
137                             List<TABLE_LIST> &tables,
138                             List<Item> &fields, bool send_row,
139                             ulonglong select_options, const char *info,
140                             Item *having, List<Item> &all_fields);
141 static COND *build_equal_items(JOIN *join, COND *cond,
142                                COND_EQUAL *inherited,
143                                List<TABLE_LIST> *join_list,
144                                bool ignore_on_conds,
145                                COND_EQUAL **cond_equal_ref,
146                                bool link_equal_fields= FALSE);
147 static COND* substitute_for_best_equal_field(THD *thd, JOIN_TAB *context_tab,
148                                              COND *cond,
149                                              COND_EQUAL *cond_equal,
150                                              void *table_join_idx);
151 static COND *simplify_joins(JOIN *join, List<TABLE_LIST> *join_list,
152                             COND *conds, bool top, bool in_sj);
153 static bool check_interleaving_with_nj(JOIN_TAB *next);
154 static void restore_prev_nj_state(JOIN_TAB *last);
155 static uint reset_nj_counters(JOIN *join, List<TABLE_LIST> *join_list);
156 static uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list,
157                                           uint first_unused);
158 
159 static COND *optimize_cond(JOIN *join, COND *conds,
160                            List<TABLE_LIST> *join_list,
161                            bool ignore_on_conds,
162                            Item::cond_result *cond_value,
163                            COND_EQUAL **cond_equal,
164                            int flags= 0);
165 bool const_expression_in_where(COND *conds,Item *item, Item **comp_item);
166 static int do_select(JOIN *join, Procedure *procedure);
167 
168 static enum_nested_loop_state evaluate_join_record(JOIN *, JOIN_TAB *, int);
169 static enum_nested_loop_state
170 evaluate_null_complemented_join_record(JOIN *join, JOIN_TAB *join_tab);
171 static enum_nested_loop_state
172 end_send(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
173 static enum_nested_loop_state
174 end_write(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
175 static enum_nested_loop_state
176 end_update(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
177 static enum_nested_loop_state
178 end_unique_update(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
179 
180 static int join_read_const_table(THD *thd, JOIN_TAB *tab, POSITION *pos);
181 static int join_read_system(JOIN_TAB *tab);
182 static int join_read_const(JOIN_TAB *tab);
183 static int join_read_key(JOIN_TAB *tab);
184 static void join_read_key_unlock_row(st_join_table *tab);
185 static int join_read_always_key(JOIN_TAB *tab);
186 static int join_read_last_key(JOIN_TAB *tab);
187 static int join_no_more_records(READ_RECORD *info);
188 static int join_read_next(READ_RECORD *info);
189 static int join_init_quick_read_record(JOIN_TAB *tab);
190 static int test_if_quick_select(JOIN_TAB *tab);
191 static bool test_if_use_dynamic_range_scan(JOIN_TAB *join_tab);
192 static int join_read_first(JOIN_TAB *tab);
193 static int join_read_next(READ_RECORD *info);
194 static int join_read_next_same(READ_RECORD *info);
195 static int join_read_last(JOIN_TAB *tab);
196 static int join_read_prev_same(READ_RECORD *info);
197 static int join_read_prev(READ_RECORD *info);
198 static int join_ft_read_first(JOIN_TAB *tab);
199 static int join_ft_read_next(READ_RECORD *info);
200 int join_read_always_key_or_null(JOIN_TAB *tab);
201 int join_read_next_same_or_null(READ_RECORD *info);
202 static COND *make_cond_for_table(THD *thd, Item *cond,table_map table,
203                                  table_map used_table,
204                                  int join_tab_idx_arg,
205                                  bool exclude_expensive_cond,
206                                  bool retain_ref_cond);
207 static COND *make_cond_for_table_from_pred(THD *thd, Item *root_cond,
208                                            Item *cond,
209                                            table_map tables,
210                                            table_map used_table,
211                                            int join_tab_idx_arg,
212                                            bool exclude_expensive_cond,
213                                            bool retain_ref_cond,
214                                            bool is_top_and_level);
215 
216 static Item* part_of_refkey(TABLE *form,Field *field);
217 uint find_shortest_key(TABLE *table, const key_map *usable_keys);
218 static bool test_if_cheaper_ordering(const JOIN_TAB *tab,
219                                      ORDER *order, TABLE *table,
220                                      key_map usable_keys, int key,
221                                      ha_rows select_limit,
222                                      int *new_key, int *new_key_direction,
223                                      ha_rows *new_select_limit,
224                                      uint *new_used_key_parts= NULL,
225                                      uint *saved_best_key_parts= NULL);
226 static int test_if_order_by_key(JOIN *join,
227                                 ORDER *order, TABLE *table, uint idx,
228 				uint *used_key_parts= NULL);
229 static bool test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,
230 				    ha_rows select_limit, bool no_changes,
231                                     const key_map *map);
232 static bool list_contains_unique_index(TABLE *table,
233                           bool (*find_func) (Field *, void *), void *data);
234 static bool find_field_in_item_list (Field *field, void *data);
235 static bool find_field_in_order_list (Field *field, void *data);
236 int create_sort_index(THD *thd, JOIN *join, JOIN_TAB *tab, Filesort *fsort);
237 static int remove_dup_with_compare(THD *thd, TABLE *entry, Field **field,
238 				   Item *having);
239 static int remove_dup_with_hash_index(THD *thd,TABLE *table,
240 				      uint field_count, Field **first_field,
241 				      ulong key_length,Item *having);
242 static bool cmp_buffer_with_ref(THD *thd, TABLE *table, TABLE_REF *tab_ref);
243 static bool setup_new_fields(THD *thd, List<Item> &fields,
244 			     List<Item> &all_fields, ORDER *new_order);
245 static ORDER *create_distinct_group(THD *thd, Ref_ptr_array ref_pointer_array,
246                                     ORDER *order, List<Item> &fields,
247                                     List<Item> &all_fields,
248 				    bool *all_order_by_fields_used);
249 static bool test_if_subpart(ORDER *a,ORDER *b);
250 static TABLE *get_sort_by_table(ORDER *a,ORDER *b,List<TABLE_LIST> &tables,
251                                 table_map const_tables);
252 static void calc_group_buffer(JOIN *join,ORDER *group);
253 static bool make_group_fields(JOIN *main_join, JOIN *curr_join);
254 static bool alloc_group_fields(JOIN *join,ORDER *group);
255 // Create list for using with tempory table
256 static bool change_to_use_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array,
257 				     List<Item> &new_list1,
258 				     List<Item> &new_list2,
259 				     uint elements, List<Item> &items);
260 // Create list for using with tempory table
261 static bool change_refs_to_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array,
262 				      List<Item> &new_list1,
263 				      List<Item> &new_list2,
264 				      uint elements, List<Item> &items);
265 static void init_tmptable_sum_functions(Item_sum **func);
266 static void update_tmptable_sum_func(Item_sum **func,TABLE *tmp_table);
267 static void copy_sum_funcs(Item_sum **func_ptr, Item_sum **end);
268 static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab);
269 static bool setup_sum_funcs(THD *thd, Item_sum **func_ptr);
270 static bool prepare_sum_aggregators(Item_sum **func_ptr, bool need_distinct);
271 static bool init_sum_functions(Item_sum **func, Item_sum **end);
272 static bool update_sum_func(Item_sum **func);
273 static void select_describe(JOIN *join, bool need_tmp_table,bool need_order,
274 			    bool distinct, const char *message=NullS);
275 static void add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab);
276 static uint make_join_orderinfo(JOIN *join);
277 static bool generate_derived_keys(DYNAMIC_ARRAY *keyuse_array);
278 
279 Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field,
280                             bool *inherited_fl);
281 JOIN_TAB *first_depth_first_tab(JOIN* join);
282 JOIN_TAB *next_depth_first_tab(JOIN* join, JOIN_TAB* tab);
283 
284 static JOIN_TAB *next_breadth_first_tab(JOIN_TAB *first_top_tab,
285                                         uint n_top_tabs_count, JOIN_TAB *tab);
286 static bool find_order_in_list(THD *, Ref_ptr_array, TABLE_LIST *, ORDER *,
287                                List<Item> &, List<Item> &, bool, bool, bool);
288 
289 static double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
290                                      table_map rem_tables);
291 void set_postjoin_aggr_write_func(JOIN_TAB *tab);
292 
293 static Item **get_sargable_cond(JOIN *join, TABLE *table);
294 
295 bool is_eq_cond_injected_for_split_opt(Item_func_eq *eq_item);
296 
297 #ifndef DBUG_OFF
298 
299 /*
300   SHOW EXPLAIN testing: wait for, and serve n_calls APC requests.
301 */
dbug_serve_apcs(THD * thd,int n_calls)302 void dbug_serve_apcs(THD *thd, int n_calls)
303 {
304   const char *save_proc_info= thd->proc_info;
305 
306   /* Busy-wait for n_calls APC requests to arrive and be processed */
307   int n_apcs= thd->apc_target.n_calls_processed + n_calls;
308   while (thd->apc_target.n_calls_processed < n_apcs)
309   {
310     /* This is so that mysqltest knows we're ready to serve requests: */
311     thd_proc_info(thd, "show_explain_trap");
312     my_sleep(30000);
313     thd_proc_info(thd, save_proc_info);
314     if (unlikely(thd->check_killed(1)))
315       break;
316   }
317 }
318 
319 
320 /*
321   Debugging: check if @name=value, comparing as integer
322 
323   Intended usage:
324 
325   DBUG_EXECUTE_IF("show_explain_probe_2",
326                      if (dbug_user_var_equals_int(thd, "select_id", select_id))
327                         dbug_serve_apcs(thd, 1);
328                  );
329 
330 */
331 
dbug_user_var_equals_int(THD * thd,const char * name,int value)332 bool dbug_user_var_equals_int(THD *thd, const char *name, int value)
333 {
334   user_var_entry *var;
335   LEX_CSTRING varname= { name, strlen(name)};
336   if ((var= get_variable(&thd->user_vars, &varname, FALSE)))
337   {
338     bool null_value;
339     longlong var_value= var->val_int(&null_value);
340     if (!null_value && var_value == value)
341       return TRUE;
342   }
343   return FALSE;
344 }
345 #endif
346 
347 
348 /**
349   This handles SELECT with and without UNION.
350 */
351 
handle_select(THD * thd,LEX * lex,select_result * result,ulong setup_tables_done_option)352 bool handle_select(THD *thd, LEX *lex, select_result *result,
353                    ulong setup_tables_done_option)
354 {
355   bool res;
356   SELECT_LEX *select_lex = &lex->select_lex;
357   DBUG_ENTER("handle_select");
358   MYSQL_SELECT_START(thd->query());
359 
360   if (select_lex->master_unit()->is_unit_op() ||
361       select_lex->master_unit()->fake_select_lex)
362     res= mysql_union(thd, lex, result, &lex->unit, setup_tables_done_option);
363   else
364   {
365     SELECT_LEX_UNIT *unit= &lex->unit;
366     unit->set_limit(unit->global_parameters());
367     /*
368       'options' of mysql_select will be set in JOIN, as far as JOIN for
369       every PS/SP execution new, we will not need reset this flag if
370       setup_tables_done_option changed for next rexecution
371     */
372     res= mysql_select(thd,
373 		      select_lex->table_list.first,
374 		      select_lex->with_wild, select_lex->item_list,
375 		      select_lex->where,
376 		      select_lex->order_list.elements +
377 		      select_lex->group_list.elements,
378 		      select_lex->order_list.first,
379 		      select_lex->group_list.first,
380 		      select_lex->having,
381 		      lex->proc_list.first,
382 		      select_lex->options | thd->variables.option_bits |
383                       setup_tables_done_option,
384 		      result, unit, select_lex);
385   }
386   DBUG_PRINT("info",("res: %d  report_error: %d", res,
387 		     thd->is_error()));
388   res|= thd->is_error();
389   if (unlikely(res))
390     result->abort_result_set();
391   if (unlikely(thd->killed == ABORT_QUERY && !thd->no_errors))
392   {
393     /*
394       If LIMIT ROWS EXAMINED interrupted query execution, issue a warning,
395       continue with normal processing and produce an incomplete query result.
396     */
397     bool saved_abort_on_warning= thd->abort_on_warning;
398     thd->abort_on_warning= false;
399     push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
400                         ER_QUERY_EXCEEDED_ROWS_EXAMINED_LIMIT,
401                         ER_THD(thd, ER_QUERY_EXCEEDED_ROWS_EXAMINED_LIMIT),
402                         thd->accessed_rows_and_keys,
403                         thd->lex->limit_rows_examined->val_uint());
404     thd->abort_on_warning= saved_abort_on_warning;
405     thd->reset_killed();
406   }
407   /* Disable LIMIT ROWS EXAMINED after query execution. */
408   thd->lex->limit_rows_examined_cnt= ULONGLONG_MAX;
409 
410   MYSQL_SELECT_DONE((int) res, (ulong) thd->limit_found_rows);
411   DBUG_RETURN(res);
412 }
413 
414 
415 /**
416   Fix fields referenced from inner selects.
417 
418   @param thd               Thread handle
419   @param all_fields        List of all fields used in select
420   @param select            Current select
421   @param ref_pointer_array Array of references to Items used in current select
422   @param group_list        GROUP BY list (is NULL by default)
423 
424   @details
425     The function serves 3 purposes
426 
427     - adds fields referenced from inner query blocks to the current select list
428 
429     - Decides which class to use to reference the items (Item_ref or
430       Item_direct_ref)
431 
432     - fixes references (Item_ref objects) to these fields.
433 
434     If a field isn't already on the select list and the ref_pointer_array
435     is provided then it is added to the all_fields list and the pointer to
436     it is saved in the ref_pointer_array.
437 
438     The class to access the outer field is determined by the following rules:
439 
440     -#. If the outer field isn't used under an aggregate function then the
441         Item_ref class should be used.
442 
443     -#. If the outer field is used under an aggregate function and this
444         function is, in turn, aggregated in the query block where the outer
445         field was resolved or some query nested therein, then the
446         Item_direct_ref class should be used. Also it should be used if we are
447         grouping by a subquery that references this outer field.
448 
449     The resolution is done here and not at the fix_fields() stage as
450     it can be done only after aggregate functions are fixed and pulled up to
451     selects where they are to be aggregated.
452 
453     When the class is chosen it substitutes the original field in the
454     Item_outer_ref object.
455 
456     After this we proceed with fixing references (Item_outer_ref objects) to
457     this field from inner subqueries.
458 
459   @return Status
460   @retval true An error occurred.
461   @retval false OK.
462  */
463 
464 bool
fix_inner_refs(THD * thd,List<Item> & all_fields,SELECT_LEX * select,Ref_ptr_array ref_pointer_array)465 fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select,
466                Ref_ptr_array ref_pointer_array)
467 {
468   Item_outer_ref *ref;
469 
470   /*
471     Mark the references from  the inner_refs_list that are occurred in
472     the group by expressions. Those references will contain direct
473     references to the referred fields. The markers are set in
474     the found_in_group_by field of the references from the list.
475   */
476   List_iterator_fast <Item_outer_ref> ref_it(select->inner_refs_list);
477   for (ORDER *group= select->join->group_list; group;  group= group->next)
478   {
479     (*group->item)->walk(&Item::check_inner_refs_processor, TRUE, &ref_it);
480   }
481 
482   while ((ref= ref_it++))
483   {
484     bool direct_ref= false;
485     Item *item= ref->outer_ref;
486     Item **item_ref= ref->ref;
487     Item_ref *new_ref;
488     /*
489       TODO: this field item already might be present in the select list.
490       In this case instead of adding new field item we could use an
491       existing one. The change will lead to less operations for copying fields,
492       smaller temporary tables and less data passed through filesort.
493     */
494     if (!ref_pointer_array.is_null() && !ref->found_in_select_list)
495     {
496       int el= all_fields.elements;
497       ref_pointer_array[el]= item;
498       /* Add the field item to the select list of the current select. */
499       all_fields.push_front(item, thd->mem_root);
500       /*
501         If it's needed reset each Item_ref item that refers this field with
502         a new reference taken from ref_pointer_array.
503       */
504       item_ref= &ref_pointer_array[el];
505     }
506 
507     if (ref->in_sum_func)
508     {
509       Item_sum *sum_func;
510       if (ref->in_sum_func->nest_level > select->nest_level)
511         direct_ref= TRUE;
512       else
513       {
514         for (sum_func= ref->in_sum_func; sum_func &&
515              sum_func->aggr_level >= select->nest_level;
516              sum_func= sum_func->in_sum_func)
517         {
518           if (sum_func->aggr_level == select->nest_level)
519           {
520             direct_ref= TRUE;
521             break;
522           }
523         }
524       }
525     }
526     else if (ref->found_in_group_by)
527       direct_ref= TRUE;
528 
529     new_ref= direct_ref ?
530               new (thd->mem_root) Item_direct_ref(thd, ref->context, item_ref, ref->table_name,
531                           &ref->field_name, ref->alias_name_used) :
532               new (thd->mem_root) Item_ref(thd, ref->context, item_ref, ref->table_name,
533                           &ref->field_name, ref->alias_name_used);
534     if (!new_ref)
535       return TRUE;
536     ref->outer_ref= new_ref;
537     ref->ref= &ref->outer_ref;
538 
539     if (ref->fix_fields_if_needed(thd, 0))
540       return TRUE;
541     thd->lex->used_tables|= item->used_tables();
542     thd->lex->current_select->select_list_tables|= item->used_tables();
543   }
544   return false;
545 }
546 
547 /**
548    The following clauses are redundant for subqueries:
549 
550    DISTINCT
551    GROUP BY   if there are no aggregate functions and no HAVING
552               clause
553 
554    Because redundant clauses are removed both from JOIN and
555    select_lex, the removal is permanent. Thus, it only makes sense to
556    call this function for normal queries and on first execution of
557    SP/PS
558 
559    @param subq_select_lex   select_lex that is part of a subquery
560                             predicate. This object and the associated
561                             join is modified.
562 */
563 
564 static
remove_redundant_subquery_clauses(st_select_lex * subq_select_lex)565 void remove_redundant_subquery_clauses(st_select_lex *subq_select_lex)
566 {
567   DBUG_ENTER("remove_redundant_subquery_clauses");
568   Item_subselect *subq_predicate= subq_select_lex->master_unit()->item;
569   /*
570     The removal should happen for IN, ALL, ANY and EXISTS subqueries,
571     which means all but single row subqueries. Example single row
572     subqueries:
573        a) SELECT * FROM t1 WHERE t1.a = (<single row subquery>)
574        b) SELECT a, (<single row subquery) FROM t1
575    */
576   if (subq_predicate->substype() == Item_subselect::SINGLEROW_SUBS)
577     DBUG_VOID_RETURN;
578 
579   /* A subquery that is not single row should be one of IN/ALL/ANY/EXISTS. */
580   DBUG_ASSERT (subq_predicate->substype() == Item_subselect::EXISTS_SUBS ||
581                subq_predicate->is_in_predicate());
582 
583   if (subq_select_lex->options & SELECT_DISTINCT)
584   {
585     subq_select_lex->join->select_distinct= false;
586     subq_select_lex->options&= ~SELECT_DISTINCT;
587     DBUG_PRINT("info", ("DISTINCT removed"));
588   }
589 
590   /*
591     Remove GROUP BY if there are no aggregate functions and no HAVING
592     clause
593   */
594   if (subq_select_lex->group_list.elements &&
595       !subq_select_lex->with_sum_func && !subq_select_lex->join->having)
596   {
597     for (ORDER *ord= subq_select_lex->group_list.first; ord; ord= ord->next)
598     {
599       /*
600         Do not remove the item if it is used in select list and then referred
601         from GROUP BY clause by its name or number. Example:
602 
603           select (select ... ) as SUBQ ...  group by SUBQ
604 
605         Here SUBQ cannot be removed.
606       */
607       if (!ord->in_field_list)
608         (*ord->item)->walk(&Item::eliminate_subselect_processor, FALSE, NULL);
609     }
610     subq_select_lex->join->group_list= NULL;
611     subq_select_lex->group_list.empty();
612     DBUG_PRINT("info", ("GROUP BY removed"));
613   }
614 
615   /*
616     TODO: This would prevent processing quries with ORDER BY ... LIMIT
617     therefore we disable this optimization for now.
618     Remove GROUP BY if there are no aggregate functions and no HAVING
619     clause
620   if (subq_select_lex->group_list.elements &&
621       !subq_select_lex->with_sum_func && !subq_select_lex->join->having)
622   {
623     subq_select_lex->join->group_list= NULL;
624     subq_select_lex->group_list.empty();
625   }
626   */
627   DBUG_VOID_RETURN;
628 }
629 
630 
631 /**
632   Function to setup clauses without sum functions.
633 */
634 static inline int
setup_without_group(THD * thd,Ref_ptr_array ref_pointer_array,TABLE_LIST * tables,List<TABLE_LIST> & leaves,List<Item> & fields,List<Item> & all_fields,COND ** conds,ORDER * order,ORDER * group,List<Window_spec> & win_specs,List<Item_window_func> & win_funcs,bool * hidden_group_fields,uint * reserved)635 setup_without_group(THD *thd, Ref_ptr_array ref_pointer_array,
636                               TABLE_LIST *tables,
637                               List<TABLE_LIST> &leaves,
638                               List<Item> &fields,
639                               List<Item> &all_fields,
640                               COND **conds,
641                               ORDER *order,
642                               ORDER *group,
643                               List<Window_spec> &win_specs,
644 		              List<Item_window_func> &win_funcs,
645                               bool *hidden_group_fields,
646                               uint *reserved)
647 {
648   int res;
649   enum_parsing_place save_place;
650   st_select_lex *const select= thd->lex->current_select;
651   nesting_map save_allow_sum_func= thd->lex->allow_sum_func;
652   /*
653     Need to stave the value, so we can turn off only any new non_agg_field_used
654     additions coming from the WHERE
655   */
656   const bool saved_non_agg_field_used= select->non_agg_field_used();
657   DBUG_ENTER("setup_without_group");
658 
659   thd->lex->allow_sum_func.clear_bit(select->nest_level);
660   res= setup_conds(thd, tables, leaves, conds);
661   if (thd->lex->current_select->first_cond_optimization)
662   {
663     if (!res && *conds && ! thd->lex->current_select->merged_into)
664       (*reserved)= (*conds)->exists2in_reserved_items();
665     else
666       (*reserved)= 0;
667   }
668 
669   /* it's not wrong to have non-aggregated columns in a WHERE */
670   select->set_non_agg_field_used(saved_non_agg_field_used);
671 
672   thd->lex->allow_sum_func.set_bit(select->nest_level);
673 
674   save_place= thd->lex->current_select->context_analysis_place;
675   thd->lex->current_select->context_analysis_place= IN_ORDER_BY;
676   res= res || setup_order(thd, ref_pointer_array, tables, fields, all_fields,
677                           order);
678   thd->lex->allow_sum_func.clear_bit(select->nest_level);
679   thd->lex->current_select->context_analysis_place= IN_GROUP_BY;
680   res= res || setup_group(thd, ref_pointer_array, tables, fields, all_fields,
681                           group, hidden_group_fields);
682   thd->lex->current_select->context_analysis_place= save_place;
683   thd->lex->allow_sum_func.set_bit(select->nest_level);
684   res= res || setup_windows(thd, ref_pointer_array, tables, fields, all_fields,
685                             win_specs, win_funcs);
686   thd->lex->allow_sum_func= save_allow_sum_func;
687   DBUG_RETURN(res);
688 }
689 
init_from_sysvar(THD * thd)690 bool vers_select_conds_t::init_from_sysvar(THD *thd)
691 {
692   vers_asof_timestamp_t &in= thd->variables.vers_asof_timestamp;
693   type= (vers_system_time_t) in.type;
694   delete_history= false;
695   start.unit= VERS_TIMESTAMP;
696   if (type != SYSTEM_TIME_UNSPECIFIED && type != SYSTEM_TIME_ALL)
697   {
698     DBUG_ASSERT(type == SYSTEM_TIME_AS_OF);
699     MYSQL_TIME ltime;
700     thd->variables.time_zone->gmt_sec_to_TIME(&ltime, in.unix_time);
701     ltime.second_part = in.second_part;
702 
703     start.item= new (thd->mem_root)
704         Item_datetime_literal(thd, &ltime, TIME_SECOND_PART_DIGITS);
705     if (!start.item)
706       return true;
707   }
708   else
709     start.item= NULL;
710   end.empty();
711   return false;
712 }
713 
print(String * str,enum_query_type query_type) const714 void vers_select_conds_t::print(String *str, enum_query_type query_type) const
715 {
716   switch (orig_type) {
717   case SYSTEM_TIME_UNSPECIFIED:
718     break;
719   case SYSTEM_TIME_AS_OF:
720     start.print(str, query_type, STRING_WITH_LEN(" FOR SYSTEM_TIME AS OF "));
721     break;
722   case SYSTEM_TIME_FROM_TO:
723     start.print(str, query_type, STRING_WITH_LEN(" FOR SYSTEM_TIME FROM "));
724     end.print(str, query_type, STRING_WITH_LEN(" TO "));
725     break;
726   case SYSTEM_TIME_BETWEEN:
727     start.print(str, query_type, STRING_WITH_LEN(" FOR SYSTEM_TIME BETWEEN "));
728     end.print(str, query_type, STRING_WITH_LEN(" AND "));
729     break;
730   case SYSTEM_TIME_BEFORE:
731   case SYSTEM_TIME_HISTORY:
732     DBUG_ASSERT(0);
733     break;
734   case SYSTEM_TIME_ALL:
735     str->append(" FOR SYSTEM_TIME ALL");
736     break;
737   }
738 }
739 
740 static
skip_setup_conds(THD * thd)741 bool skip_setup_conds(THD *thd)
742 {
743   return (!thd->stmt_arena->is_conventional()
744           && !thd->stmt_arena->is_stmt_prepare_or_first_sp_execute())
745          || thd->lex->is_view_context_analysis();
746 }
747 
vers_setup_conds(THD * thd,TABLE_LIST * tables)748 int SELECT_LEX::vers_setup_conds(THD *thd, TABLE_LIST *tables)
749 {
750   DBUG_ENTER("SELECT_LEX::vers_setup_cond");
751 #define newx new (thd->mem_root)
752 
753   const bool update_conds= !skip_setup_conds(thd);
754   TABLE_LIST *table;
755 
756   if (!versioned_tables)
757   {
758     for (table= tables; table; table= table->next_local)
759     {
760       if (table->table && table->table->versioned())
761         versioned_tables++;
762       else if (table->vers_conditions.is_set() &&
763               (table->is_non_derived() || !table->vers_conditions.used))
764       {
765         my_error(ER_VERS_NOT_VERSIONED, MYF(0), table->alias.str);
766         DBUG_RETURN(-1);
767       }
768     }
769   }
770 
771   if (versioned_tables == 0)
772     DBUG_RETURN(0);
773 
774   /* For prepared statements we create items on statement arena,
775      because they must outlive execution phase for multiple executions. */
776   Query_arena_stmt on_stmt_arena(thd);
777 
778   // find outer system_time
779   SELECT_LEX *outer_slex= outer_select();
780   TABLE_LIST* outer_table= NULL;
781 
782   if (outer_slex)
783   {
784     TABLE_LIST* derived= master_unit()->derived;
785     // inner SELECT may not be a derived table (derived == NULL)
786     while (derived && outer_slex && !derived->vers_conditions.is_set())
787     {
788       derived= outer_slex->master_unit()->derived;
789       outer_slex= outer_slex->outer_select();
790     }
791     if (derived && outer_slex)
792     {
793       DBUG_ASSERT(derived->vers_conditions.is_set());
794       outer_table= derived;
795     }
796   }
797 
798   bool is_select= false;
799   bool use_sysvar= false;
800   switch (thd->lex->sql_command)
801   {
802   case SQLCOM_SELECT:
803     use_sysvar= true;
804     /* fall through */
805   case SQLCOM_CREATE_TABLE:
806   case SQLCOM_INSERT_SELECT:
807   case SQLCOM_REPLACE_SELECT:
808   case SQLCOM_DELETE_MULTI:
809   case SQLCOM_UPDATE_MULTI:
810     is_select= true;
811   default:
812     break;
813   }
814 
815   for (table= tables; table; table= table->next_local)
816   {
817     if (!table->table || table->is_view() || !table->table->versioned())
818       continue;
819 
820     vers_select_conds_t &vers_conditions= table->vers_conditions;
821 
822 #ifdef WITH_PARTITION_STORAGE_ENGINE
823       /*
824         if the history is stored in partitions, then partitions
825         themselves are not versioned
826       */
827       if (table->partition_names && table->table->part_info->vers_info)
828       {
829         /* If the history is stored in partitions, then partitions
830             themselves are not versioned. */
831         if (vers_conditions.was_set())
832         {
833           my_error(ER_VERS_QUERY_IN_PARTITION, MYF(0), table->alias.str);
834           DBUG_RETURN(-1);
835         }
836         else if (!vers_conditions.is_set())
837           vers_conditions.type= SYSTEM_TIME_ALL;
838       }
839 #endif
840 
841     if (outer_table && !vers_conditions.is_set())
842     {
843       // propagate system_time from nearest outer SELECT_LEX
844       vers_conditions= outer_table->vers_conditions;
845       outer_table->vers_conditions.used= true;
846     }
847 
848     // propagate system_time from sysvar
849     if (!vers_conditions.is_set() && use_sysvar)
850     {
851       if (vers_conditions.init_from_sysvar(thd))
852         DBUG_RETURN(-1);
853     }
854 
855     if (vers_conditions.is_set())
856     {
857       if (vers_conditions.was_set() &&
858           table->lock_type > TL_READ_NO_INSERT &&
859           !vers_conditions.delete_history)
860       {
861         my_error(ER_TABLE_NOT_LOCKED_FOR_WRITE, MYF(0), table->alias.str);
862         DBUG_RETURN(-1);
863       }
864 
865       if (vers_conditions.type == SYSTEM_TIME_ALL)
866         continue;
867     }
868 
869     const LEX_CSTRING *fstart=
870         thd->make_clex_string(table->table->vers_start_field()->field_name);
871     const LEX_CSTRING *fend=
872         thd->make_clex_string(table->table->vers_end_field()->field_name);
873 
874     Item *row_start=
875         newx Item_field(thd, &this->context, table->db.str, table->alias.str, fstart);
876     Item *row_end=
877         newx Item_field(thd, &this->context, table->db.str, table->alias.str, fend);
878 
879     bool timestamps_only= table->table->versioned(VERS_TIMESTAMP);
880 
881     if (vers_conditions.is_set() && vers_conditions.type != SYSTEM_TIME_HISTORY)
882     {
883       thd->where= "FOR SYSTEM_TIME";
884       /* TODO: do resolve fix_length_and_dec(), fix_fields(). This requires
885         storing vers_conditions as Item and make some magic related to
886         vers_system_time_t/VERS_TRX_ID at stage of fix_fields()
887         (this is large refactoring). */
888       if (vers_conditions.resolve_units(thd))
889         DBUG_RETURN(-1);
890       if (timestamps_only && (vers_conditions.start.unit == VERS_TRX_ID ||
891         vers_conditions.end.unit == VERS_TRX_ID))
892       {
893         my_error(ER_VERS_ENGINE_UNSUPPORTED, MYF(0), table->table_name.str);
894         DBUG_RETURN(-1);
895       }
896     }
897 
898     if (!update_conds)
899       continue;
900 
901     Item *cond1= NULL, *cond2= NULL, *cond3= NULL, *curr= NULL;
902     Item *point_in_time1= vers_conditions.start.item;
903     Item *point_in_time2= vers_conditions.end.item;
904     TABLE *t= table->table;
905     if (t->versioned(VERS_TIMESTAMP))
906     {
907       MYSQL_TIME max_time;
908       switch (vers_conditions.type)
909       {
910       case SYSTEM_TIME_UNSPECIFIED:
911       case SYSTEM_TIME_HISTORY:
912         thd->variables.time_zone->gmt_sec_to_TIME(&max_time, TIMESTAMP_MAX_VALUE);
913         max_time.second_part= TIME_MAX_SECOND_PART;
914         curr= newx Item_datetime_literal(thd, &max_time, TIME_SECOND_PART_DIGITS);
915         if (vers_conditions.type == SYSTEM_TIME_UNSPECIFIED)
916           cond1= newx Item_func_eq(thd, row_end, curr);
917         else
918           cond1= newx Item_func_lt(thd, row_end, curr);
919         break;
920       case SYSTEM_TIME_AS_OF:
921         cond1= newx Item_func_le(thd, row_start, point_in_time1);
922         cond2= newx Item_func_gt(thd, row_end, point_in_time1);
923         break;
924       case SYSTEM_TIME_FROM_TO:
925         cond1= newx Item_func_lt(thd, row_start, point_in_time2);
926         cond2= newx Item_func_gt(thd, row_end, point_in_time1);
927         cond3= newx Item_func_lt(thd, point_in_time1, point_in_time2);
928         break;
929       case SYSTEM_TIME_BETWEEN:
930         cond1= newx Item_func_le(thd, row_start, point_in_time2);
931         cond2= newx Item_func_gt(thd, row_end, point_in_time1);
932         cond3= newx Item_func_le(thd, point_in_time1, point_in_time2);
933         break;
934       case SYSTEM_TIME_BEFORE:
935         cond1= newx Item_func_history(thd, row_end);
936         cond2= newx Item_func_lt(thd, row_end, point_in_time1);
937         break;
938       default:
939         DBUG_ASSERT(0);
940       }
941     }
942     else
943     {
944       DBUG_ASSERT(table->table->s && table->table->s->db_plugin);
945 
946       Item *trx_id0, *trx_id1;
947 
948       switch (vers_conditions.type)
949       {
950       case SYSTEM_TIME_UNSPECIFIED:
951       case SYSTEM_TIME_HISTORY:
952         curr= newx Item_int(thd, ULONGLONG_MAX);
953         if (vers_conditions.type == SYSTEM_TIME_UNSPECIFIED)
954           cond1= newx Item_func_eq(thd, row_end, curr);
955         else
956           cond1= newx Item_func_lt(thd, row_end, curr);
957         break;
958       case SYSTEM_TIME_AS_OF:
959         trx_id0= vers_conditions.start.unit == VERS_TIMESTAMP
960           ? newx Item_func_trt_id(thd, point_in_time1, TR_table::FLD_TRX_ID)
961           : point_in_time1;
962         cond1= newx Item_func_trt_trx_sees_eq(thd, trx_id0, row_start);
963         cond2= newx Item_func_trt_trx_sees(thd, row_end, trx_id0);
964         break;
965       case SYSTEM_TIME_FROM_TO:
966 	cond3= newx Item_func_lt(thd, point_in_time1, point_in_time2);
967         /* fall through */
968       case SYSTEM_TIME_BETWEEN:
969         trx_id0= vers_conditions.start.unit == VERS_TIMESTAMP
970           ? newx Item_func_trt_id(thd, point_in_time1, TR_table::FLD_TRX_ID, true)
971           : point_in_time1;
972         trx_id1= vers_conditions.end.unit == VERS_TIMESTAMP
973           ? newx Item_func_trt_id(thd, point_in_time2, TR_table::FLD_TRX_ID, false)
974           : point_in_time2;
975         cond1= vers_conditions.type == SYSTEM_TIME_FROM_TO
976           ? newx Item_func_trt_trx_sees(thd, trx_id1, row_start)
977           : newx Item_func_trt_trx_sees_eq(thd, trx_id1, row_start);
978         cond2= newx Item_func_trt_trx_sees_eq(thd, row_end, trx_id0);
979 	if (!cond3)
980 	  cond3= newx Item_func_le(thd, point_in_time1, point_in_time2);
981         break;
982       case SYSTEM_TIME_BEFORE:
983         trx_id0= vers_conditions.start.unit == VERS_TIMESTAMP
984           ? newx Item_func_trt_id(thd, point_in_time1, TR_table::FLD_TRX_ID, true)
985           : point_in_time1;
986         cond1= newx Item_func_history(thd, row_end);
987         cond2= newx Item_func_trt_trx_sees(thd, trx_id0, row_end);
988         break;
989       default:
990         DBUG_ASSERT(0);
991       }
992     }
993 
994     if (cond1)
995     {
996       cond1= and_items(thd, cond2, cond1);
997       cond1= and_items(thd, cond3, cond1);
998       if (is_select)
999         table->on_expr= and_items(thd, table->on_expr, cond1);
1000       else
1001       {
1002         if (join)
1003         {
1004           where= and_items(thd, join->conds, cond1);
1005           join->conds= where;
1006         }
1007         else
1008           where= and_items(thd, where, cond1);
1009         table->where= and_items(thd, table->where, cond1);
1010       }
1011     }
1012 
1013     table->vers_conditions.type= SYSTEM_TIME_ALL;
1014   } // for (table= tables; ...)
1015 
1016   DBUG_RETURN(0);
1017 #undef newx
1018 }
1019 
1020 /*****************************************************************************
1021   Check fields, find best join, do the select and output fields.
1022   mysql_select assumes that all tables are already opened
1023 *****************************************************************************/
1024 
1025 
1026 /**
1027   Prepare of whole select (including sub queries in future).
1028 
1029   @todo
1030     Add check of calculation of GROUP functions and fields:
1031     SELECT COUNT(*)+table.col1 from table1;
1032 
1033   @retval
1034     -1   on error
1035   @retval
1036     0   on success
1037 */
1038 int
prepare(TABLE_LIST * tables_init,uint wild_num,COND * conds_init,uint og_num,ORDER * order_init,bool skip_order_by,ORDER * group_init,Item * having_init,ORDER * proc_param_init,SELECT_LEX * select_lex_arg,SELECT_LEX_UNIT * unit_arg)1039 JOIN::prepare(TABLE_LIST *tables_init,
1040 	      uint wild_num, COND *conds_init, uint og_num,
1041 	      ORDER *order_init, bool skip_order_by,
1042               ORDER *group_init, Item *having_init,
1043 	      ORDER *proc_param_init, SELECT_LEX *select_lex_arg,
1044 	      SELECT_LEX_UNIT *unit_arg)
1045 {
1046   DBUG_ENTER("JOIN::prepare");
1047 
1048   // to prevent double initialization on EXPLAIN
1049   if (optimization_state != JOIN::NOT_OPTIMIZED)
1050     DBUG_RETURN(0);
1051 
1052   conds= conds_init;
1053   order= order_init;
1054   group_list= group_init;
1055   having= having_init;
1056   proc_param= proc_param_init;
1057   tables_list= tables_init;
1058   select_lex= select_lex_arg;
1059   select_lex->join= this;
1060   join_list= &select_lex->top_join_list;
1061   union_part= unit_arg->is_unit_op();
1062 
1063   // simple check that we got usable conds
1064   dbug_print_item(conds);
1065 
1066   if (select_lex->handle_derived(thd->lex, DT_PREPARE))
1067     DBUG_RETURN(-1);
1068 
1069   thd->lex->current_select->context_analysis_place= NO_MATTER;
1070   thd->lex->current_select->is_item_list_lookup= 1;
1071   /*
1072     If we have already executed SELECT, then it have not sense to prevent
1073     its table from update (see unique_table())
1074     Affects only materialized derived tables.
1075   */
1076   /* Check that all tables, fields, conds and order are ok */
1077   if (!(select_options & OPTION_SETUP_TABLES_DONE) &&
1078       setup_tables_and_check_access(thd, &select_lex->context, join_list,
1079                                     tables_list, select_lex->leaf_tables,
1080                                     FALSE, SELECT_ACL, SELECT_ACL, FALSE))
1081       DBUG_RETURN(-1);
1082 
1083   /* System Versioning: handle FOR SYSTEM_TIME clause. */
1084   if (select_lex->vers_setup_conds(thd, tables_list) < 0)
1085     DBUG_RETURN(-1);
1086 
1087   /*
1088     TRUE if the SELECT list mixes elements with and without grouping,
1089     and there is no GROUP BY clause. Mixing non-aggregated fields with
1090     aggregate functions in the SELECT list is a MySQL extenstion that
1091     is allowed only if the ONLY_FULL_GROUP_BY sql mode is not set.
1092   */
1093   mixed_implicit_grouping= false;
1094   if ((~thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY) &&
1095       select_lex->with_sum_func && !group_list)
1096   {
1097     List_iterator_fast <Item> select_it(fields_list);
1098     Item *select_el; /* Element of the SELECT clause, can be an expression. */
1099     bool found_field_elem= false;
1100     bool found_sum_func_elem= false;
1101 
1102     while ((select_el= select_it++))
1103     {
1104       if (select_el->with_sum_func)
1105         found_sum_func_elem= true;
1106       if (select_el->with_field)
1107         found_field_elem= true;
1108       if (found_sum_func_elem && found_field_elem)
1109       {
1110         mixed_implicit_grouping= true;
1111         break;
1112       }
1113     }
1114   }
1115 
1116   table_count= select_lex->leaf_tables.elements;
1117 
1118   TABLE_LIST *tbl;
1119   List_iterator_fast<TABLE_LIST> li(select_lex->leaf_tables);
1120   while ((tbl= li++))
1121   {
1122     /*
1123       If the query uses implicit grouping where the select list contains both
1124       aggregate functions and non-aggregate fields, any non-aggregated field
1125       may produce a NULL value. Set all fields of each table as nullable before
1126       semantic analysis to take into account this change of nullability.
1127 
1128       Note: this loop doesn't touch tables inside merged semi-joins, because
1129       subquery-to-semijoin conversion has not been done yet. This is intended.
1130     */
1131     if (mixed_implicit_grouping && tbl->table)
1132       tbl->table->maybe_null= 1;
1133   }
1134 
1135   uint real_og_num= og_num;
1136   if (skip_order_by &&
1137       select_lex != select_lex->master_unit()->global_parameters())
1138     real_og_num+= select_lex->order_list.elements;
1139 
1140   DBUG_ASSERT(select_lex->hidden_bit_fields == 0);
1141   if (setup_wild(thd, tables_list, fields_list, &all_fields, wild_num,
1142                  &select_lex->hidden_bit_fields))
1143     DBUG_RETURN(-1);
1144   if (select_lex->setup_ref_array(thd, real_og_num))
1145     DBUG_RETURN(-1);
1146 
1147   ref_ptrs= ref_ptr_array_slice(0);
1148 
1149   enum_parsing_place save_place=
1150                      thd->lex->current_select->context_analysis_place;
1151   thd->lex->current_select->context_analysis_place= SELECT_LIST;
1152   if (setup_fields(thd, ref_ptrs, fields_list, MARK_COLUMNS_READ,
1153                    &all_fields, &select_lex->pre_fix, 1))
1154     DBUG_RETURN(-1);
1155   thd->lex->current_select->context_analysis_place= save_place;
1156 
1157   if (setup_without_group(thd, ref_ptrs, tables_list,
1158                           select_lex->leaf_tables, fields_list,
1159                           all_fields, &conds, order, group_list,
1160                           select_lex->window_specs,
1161                           select_lex->window_funcs,
1162                           &hidden_group_fields,
1163                           &select_lex->select_n_reserved))
1164     DBUG_RETURN(-1);
1165 
1166   /*
1167     Permanently remove redundant parts from the query if
1168       1) This is a subquery
1169       2) This is the first time this query is optimized (since the
1170          transformation is permanent
1171       3) Not normalizing a view. Removal should take place when a
1172          query involving a view is optimized, not when the view
1173          is created
1174   */
1175   if (select_lex->master_unit()->item &&                               // 1)
1176       select_lex->first_cond_optimization &&                           // 2)
1177       !thd->lex->is_view_context_analysis())                           // 3)
1178   {
1179     remove_redundant_subquery_clauses(select_lex);
1180   }
1181 
1182   /* Resolve the ORDER BY that was skipped, then remove it. */
1183   if (skip_order_by && select_lex !=
1184                        select_lex->master_unit()->global_parameters())
1185   {
1186     nesting_map save_allow_sum_func= thd->lex->allow_sum_func;
1187     thd->lex->allow_sum_func.set_bit(select_lex->nest_level);
1188     thd->where= "order clause";
1189     for (ORDER *order= select_lex->order_list.first; order; order= order->next)
1190     {
1191       /* Don't add the order items to all fields. Just resolve them to ensure
1192          the query is valid, we'll drop them immediately after. */
1193       if (find_order_in_list(thd, ref_ptrs, tables_list, order,
1194                              fields_list, all_fields, false, false, false))
1195         DBUG_RETURN(-1);
1196     }
1197     thd->lex->allow_sum_func= save_allow_sum_func;
1198     select_lex->order_list.empty();
1199   }
1200 
1201   if (having)
1202   {
1203     nesting_map save_allow_sum_func= thd->lex->allow_sum_func;
1204     thd->where="having clause";
1205     thd->lex->allow_sum_func.set_bit(select_lex_arg->nest_level);
1206     select_lex->having_fix_field= 1;
1207     /*
1208       Wrap alone field in HAVING clause in case it will be outer field
1209       of subquery which need persistent pointer on it, but having
1210       could be changed by optimizer
1211     */
1212     if (having->type() == Item::REF_ITEM &&
1213         ((Item_ref *)having)->ref_type() == Item_ref::REF)
1214       wrap_ident(thd, &having);
1215     bool having_fix_rc= having->fix_fields_if_needed_for_bool(thd, &having);
1216     select_lex->having_fix_field= 0;
1217 
1218     if (unlikely(having_fix_rc || thd->is_error()))
1219       DBUG_RETURN(-1);				/* purecov: inspected */
1220     thd->lex->allow_sum_func= save_allow_sum_func;
1221 
1222     if (having->with_window_func)
1223     {
1224       my_error(ER_WRONG_PLACEMENT_OF_WINDOW_FUNCTION, MYF(0));
1225       DBUG_RETURN(-1);
1226     }
1227   }
1228 
1229   /*
1230      After setting up window functions, we may have discovered additional
1231      used tables from the PARTITION BY and ORDER BY list. Update all items
1232      that contain window functions.
1233   */
1234   if (select_lex->have_window_funcs())
1235   {
1236     List_iterator_fast<Item> it(select_lex->item_list);
1237     Item *item;
1238     while ((item= it++))
1239     {
1240       if (item->with_window_func)
1241         item->update_used_tables();
1242     }
1243   }
1244 
1245   With_clause *with_clause=select_lex->get_with_clause();
1246   if (with_clause && with_clause->prepare_unreferenced_elements(thd))
1247     DBUG_RETURN(1);
1248 
1249   With_element *with_elem= select_lex->get_with_element();
1250   if (with_elem &&
1251       select_lex->check_unrestricted_recursive(
1252                       thd->variables.only_standard_compliant_cte))
1253     DBUG_RETURN(-1);
1254   if (!(select_lex->changed_elements & TOUCHED_SEL_COND))
1255     select_lex->check_subqueries_with_recursive_references();
1256 
1257   int res= check_and_do_in_subquery_rewrites(this);
1258 
1259   select_lex->fix_prepare_information(thd, &conds, &having);
1260 
1261   if (res)
1262     DBUG_RETURN(res);
1263 
1264   if (order)
1265   {
1266     bool real_order= FALSE;
1267     ORDER *ord;
1268     for (ord= order; ord; ord= ord->next)
1269     {
1270       Item *item= *ord->item;
1271       /*
1272         Disregard sort order if there's only
1273         zero length NOT NULL fields (e.g. {VAR}CHAR(0) NOT NULL") or
1274         zero length NOT NULL string functions there.
1275         Such tuples don't contain any data to sort.
1276       */
1277       if (!real_order &&
1278            /* Not a zero length NOT NULL field */
1279           ((item->type() != Item::FIELD_ITEM ||
1280             ((Item_field *) item)->field->maybe_null() ||
1281             ((Item_field *) item)->field->sort_length()) &&
1282            /* AND not a zero length NOT NULL string function. */
1283            (item->type() != Item::FUNC_ITEM ||
1284             item->maybe_null ||
1285             item->result_type() != STRING_RESULT ||
1286             item->max_length)))
1287         real_order= TRUE;
1288 
1289       if ((item->with_sum_func && item->type() != Item::SUM_FUNC_ITEM) ||
1290           item->with_window_func)
1291         item->split_sum_func(thd, ref_ptrs, all_fields, SPLIT_SUM_SELECT);
1292     }
1293     if (!real_order)
1294       order= NULL;
1295   }
1296 
1297   if (having && having->with_sum_func)
1298     having->split_sum_func2(thd, ref_ptrs, all_fields,
1299                             &having, SPLIT_SUM_SKIP_REGISTERED);
1300   if (select_lex->inner_sum_func_list)
1301   {
1302     Item_sum *end=select_lex->inner_sum_func_list;
1303     Item_sum *item_sum= end;
1304     do
1305     {
1306       item_sum= item_sum->next;
1307       item_sum->split_sum_func2(thd, ref_ptrs,
1308                                 all_fields, item_sum->ref_by, 0);
1309     } while (item_sum != end);
1310   }
1311 
1312   if (select_lex->inner_refs_list.elements &&
1313       fix_inner_refs(thd, all_fields, select_lex, ref_ptrs))
1314     DBUG_RETURN(-1);
1315 
1316   if (group_list)
1317   {
1318     /*
1319       Because HEAP tables can't index BIT fields we need to use an
1320       additional hidden field for grouping because later it will be
1321       converted to a LONG field. Original field will remain of the
1322       BIT type and will be returned to a client.
1323     */
1324     for (ORDER *ord= group_list; ord; ord= ord->next)
1325     {
1326       if ((*ord->item)->type() == Item::FIELD_ITEM &&
1327           (*ord->item)->field_type() == MYSQL_TYPE_BIT)
1328       {
1329         Item_field *field= new (thd->mem_root) Item_field(thd, *(Item_field**)ord->item);
1330         if (!field)
1331           DBUG_RETURN(-1);
1332         int el= all_fields.elements;
1333         ref_ptrs[el]= field;
1334         all_fields.push_front(field, thd->mem_root);
1335         ord->item= &ref_ptrs[el];
1336       }
1337     }
1338   }
1339 
1340   /*
1341     Check if there are references to un-aggregated columns when computing
1342     aggregate functions with implicit grouping (there is no GROUP BY).
1343   */
1344   if (thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY && !group_list &&
1345       !(select_lex->master_unit()->item &&
1346         select_lex->master_unit()->item->is_in_predicate() &&
1347         ((Item_in_subselect*)select_lex->master_unit()->item)->
1348         test_set_strategy(SUBS_MAXMIN_INJECTED)) &&
1349       select_lex->non_agg_field_used() &&
1350       select_lex->agg_func_used())
1351   {
1352     my_message(ER_MIX_OF_GROUP_FUNC_AND_FIELDS,
1353                ER_THD(thd, ER_MIX_OF_GROUP_FUNC_AND_FIELDS), MYF(0));
1354     DBUG_RETURN(-1);
1355   }
1356   {
1357     /* Caclulate the number of groups */
1358     send_group_parts= 0;
1359     for (ORDER *group_tmp= group_list ; group_tmp ; group_tmp= group_tmp->next)
1360       send_group_parts++;
1361   }
1362 
1363   procedure= setup_procedure(thd, proc_param, result, fields_list, &error);
1364   if (unlikely(error))
1365     goto err;					/* purecov: inspected */
1366   if (procedure)
1367   {
1368     if (setup_new_fields(thd, fields_list, all_fields,
1369 			 procedure->param_fields))
1370 	goto err;				/* purecov: inspected */
1371     if (procedure->group)
1372     {
1373       if (!test_if_subpart(procedure->group,group_list))
1374       {						/* purecov: inspected */
1375 	my_message(ER_DIFF_GROUPS_PROC, ER_THD(thd, ER_DIFF_GROUPS_PROC),
1376                    MYF(0));                     /* purecov: inspected */
1377 	goto err;				/* purecov: inspected */
1378       }
1379     }
1380     if (order && (procedure->flags & PROC_NO_SORT))
1381     {						/* purecov: inspected */
1382       my_message(ER_ORDER_WITH_PROC, ER_THD(thd, ER_ORDER_WITH_PROC),
1383                  MYF(0));                       /* purecov: inspected */
1384       goto err;					/* purecov: inspected */
1385     }
1386     if (thd->lex->derived_tables)
1387     {
1388       /*
1389         Queries with derived tables and PROCEDURE are not allowed.
1390         Many of such queries are disallowed grammatically, but there
1391         are still some complex cases:
1392           SELECT 1 FROM (SELECT 1) a PROCEDURE ANALYSE()
1393       */
1394       my_error(ER_WRONG_USAGE, MYF(0), "PROCEDURE",
1395                thd->lex->derived_tables & DERIVED_VIEW ?
1396                "view" : "subquery");
1397       goto err;
1398     }
1399     if (thd->lex->sql_command != SQLCOM_SELECT)
1400     {
1401       // EXPLAIN SELECT * FROM t1 PROCEDURE ANALYSE()
1402       my_error(ER_WRONG_USAGE, MYF(0), "PROCEDURE", "non-SELECT");
1403       goto err;
1404     }
1405   }
1406 
1407   if (!procedure && result && result->prepare(fields_list, unit_arg))
1408     goto err;					/* purecov: inspected */
1409 
1410   unit= unit_arg;
1411   if (prepare_stage2())
1412     goto err;
1413 
1414   DBUG_RETURN(0); // All OK
1415 
1416 err:
1417   delete procedure;                /* purecov: inspected */
1418   procedure= 0;
1419   DBUG_RETURN(-1);                /* purecov: inspected */
1420 }
1421 
1422 
1423 /**
1424   Second phase of prepare where we collect some statistic.
1425 
1426   @details
1427   We made this part separate to be able recalculate some statistic after
1428   transforming subquery on optimization phase.
1429 */
1430 
prepare_stage2()1431 bool JOIN::prepare_stage2()
1432 {
1433   bool res= TRUE;
1434   DBUG_ENTER("JOIN::prepare_stage2");
1435 
1436   /* Init join struct */
1437   count_field_types(select_lex, &tmp_table_param, all_fields, 0);
1438   this->group= group_list != 0;
1439 
1440   if (tmp_table_param.sum_func_count && !group_list)
1441   {
1442     implicit_grouping= TRUE;
1443     // Result will contain zero or one row - ordering is meaningless
1444     order= NULL;
1445   }
1446 
1447 #ifdef RESTRICTED_GROUP
1448   if (implicit_grouping)
1449   {
1450     my_message(ER_WRONG_SUM_SELECT,ER_THD(thd, ER_WRONG_SUM_SELECT),MYF(0));
1451     goto err;
1452   }
1453 #endif
1454   if (select_lex->olap == ROLLUP_TYPE && rollup_init())
1455     goto err;
1456   if (alloc_func_list())
1457     goto err;
1458 
1459   res= FALSE;
1460 err:
1461   DBUG_RETURN(res);				/* purecov: inspected */
1462 }
1463 
1464 
build_explain()1465 bool JOIN::build_explain()
1466 {
1467   have_query_plan= QEP_AVAILABLE;
1468 
1469   /*
1470     explain data must be created on the Explain_query::mem_root. Because it's
1471     just a memroot, not an arena, explain data must not contain any Items
1472   */
1473   MEM_ROOT *old_mem_root= thd->mem_root;
1474   Item *old_free_list __attribute__((unused))= thd->free_list;
1475   thd->mem_root= thd->lex->explain->mem_root;
1476   bool res= save_explain_data(thd->lex->explain, false /* can overwrite */,
1477                         need_tmp,
1478                         !skip_sort_order && !no_order && (order || group_list),
1479                         select_distinct);
1480   thd->mem_root= old_mem_root;
1481   DBUG_ASSERT(thd->free_list == old_free_list); // no Items were created
1482   if (res)
1483     return 1;
1484 
1485   uint select_nr= select_lex->select_number;
1486   JOIN_TAB *curr_tab= join_tab + exec_join_tab_cnt();
1487   for (uint i= 0; i < aggr_tables; i++, curr_tab++)
1488   {
1489     if (select_nr == INT_MAX)
1490     {
1491       /* this is a fake_select_lex of a union */
1492       select_nr= select_lex->master_unit()->first_select()->select_number;
1493       curr_tab->tracker= thd->lex->explain->get_union(select_nr)->
1494                          get_tmptable_read_tracker();
1495     }
1496     else
1497     {
1498       curr_tab->tracker= thd->lex->explain->get_select(select_nr)->
1499                          get_using_temporary_read_tracker();
1500     }
1501   }
1502   return 0;
1503 }
1504 
1505 
optimize()1506 int JOIN::optimize()
1507 {
1508   int res= 0;
1509   create_explain_query_if_not_exists(thd->lex, thd->mem_root);
1510   join_optimization_state init_state= optimization_state;
1511   if (optimization_state == JOIN::OPTIMIZATION_PHASE_1_DONE)
1512     res= optimize_stage2();
1513   else
1514   {
1515     // to prevent double initialization on EXPLAIN
1516     if (optimization_state != JOIN::NOT_OPTIMIZED)
1517       return FALSE;
1518     optimization_state= JOIN::OPTIMIZATION_IN_PROGRESS;
1519     res= optimize_inner();
1520   }
1521   if (!with_two_phase_optimization ||
1522       init_state == JOIN::OPTIMIZATION_PHASE_1_DONE)
1523   {
1524     if (!res && have_query_plan != QEP_DELETED)
1525       res= build_explain();
1526     optimization_state= JOIN::OPTIMIZATION_DONE;
1527   }
1528   return res;
1529 }
1530 
1531 
init_join_caches()1532 int JOIN::init_join_caches()
1533 {
1534   JOIN_TAB *tab;
1535 
1536   for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
1537        tab;
1538        tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
1539   {
1540     TABLE *table= tab->table;
1541     if (table->file->keyread_enabled())
1542     {
1543       if (!(table->file->index_flags(table->file->keyread, 0, 1) & HA_CLUSTERED_INDEX))
1544         table->mark_index_columns(table->file->keyread, table->read_set);
1545     }
1546     else if ((tab->read_first_record == join_read_first ||
1547               tab->read_first_record == join_read_last) &&
1548              !tab->filesort && table->covering_keys.is_set(tab->index) &&
1549              !table->no_keyread)
1550     {
1551       table->prepare_for_keyread(tab->index, table->read_set);
1552     }
1553     if (tab->cache && tab->cache->init(select_options & SELECT_DESCRIBE))
1554       revise_cache_usage(tab);
1555     else
1556       tab->remove_redundant_bnl_scan_conds();
1557   }
1558   return 0;
1559 }
1560 
1561 
1562 /**
1563   global select optimisation.
1564 
1565   @note
1566     error code saved in field 'error'
1567 
1568   @retval
1569     0   success
1570   @retval
1571     1   error
1572 */
1573 
1574 int
optimize_inner()1575 JOIN::optimize_inner()
1576 {
1577   DBUG_ENTER("JOIN::optimize_inner");
1578   subq_exit_fl= false;
1579   do_send_rows = (unit->select_limit_cnt) ? 1 : 0;
1580 
1581   DEBUG_SYNC(thd, "before_join_optimize");
1582 
1583   THD_STAGE_INFO(thd, stage_optimizing);
1584 
1585   set_allowed_join_cache_types();
1586   need_distinct= TRUE;
1587 
1588   /*
1589     Needed in case optimizer short-cuts,
1590     set properly in make_aggr_tables_info()
1591   */
1592   fields= &select_lex->item_list;
1593 
1594   if (select_lex->first_cond_optimization)
1595   {
1596     //Do it only for the first execution
1597     /* Merge all mergeable derived tables/views in this SELECT. */
1598     if (select_lex->handle_derived(thd->lex, DT_MERGE))
1599       DBUG_RETURN(TRUE);
1600     table_count= select_lex->leaf_tables.elements;
1601   }
1602 
1603   if (select_lex->first_cond_optimization &&
1604       transform_in_predicates_into_in_subq(thd))
1605     DBUG_RETURN(1);
1606 
1607   // Update used tables after all handling derived table procedures
1608   select_lex->update_used_tables();
1609 
1610   /*
1611     In fact we transform underlying subqueries after their 'prepare' phase and
1612     before 'optimize' from upper query 'optimize' to allow semijoin
1613     conversion happened (which done in the same way.
1614   */
1615   if (select_lex->first_cond_optimization &&
1616       conds && conds->walk(&Item::exists2in_processor, 0, thd))
1617     DBUG_RETURN(1);
1618   /*
1619     TODO
1620     make view to decide if it is possible to write to WHERE directly or make Semi-Joins able to process ON condition if it is possible
1621   for (TABLE_LIST *tbl= tables_list; tbl; tbl= tbl->next_local)
1622   {
1623     if (tbl->on_expr &&
1624         tbl->on_expr->walk(&Item::exists2in_processor, 0, thd))
1625       DBUG_RETURN(1);
1626   }
1627   */
1628 
1629   if (transform_max_min_subquery())
1630     DBUG_RETURN(1); /* purecov: inspected */
1631 
1632   if (select_lex->first_cond_optimization)
1633   {
1634     /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
1635     if (convert_join_subqueries_to_semijoins(this))
1636       DBUG_RETURN(1); /* purecov: inspected */
1637     /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
1638     select_lex->update_used_tables();
1639   }
1640 
1641   eval_select_list_used_tables();
1642 
1643   table_count= select_lex->leaf_tables.elements;
1644 
1645   if (select_lex->options & OPTION_SCHEMA_TABLE &&
1646       optimize_schema_tables_memory_usage(select_lex->leaf_tables))
1647     DBUG_RETURN(1);
1648 
1649   if (setup_ftfuncs(select_lex)) /* should be after having->fix_fields */
1650     DBUG_RETURN(-1);
1651 
1652   row_limit= ((select_distinct || order || group_list) ? HA_POS_ERROR :
1653 	      unit->select_limit_cnt);
1654   /* select_limit is used to decide if we are likely to scan the whole table */
1655   select_limit= unit->select_limit_cnt;
1656   if (having || (select_options & OPTION_FOUND_ROWS))
1657     select_limit= HA_POS_ERROR;
1658 #ifdef HAVE_REF_TO_FIELDS			// Not done yet
1659   /* Add HAVING to WHERE if possible */
1660   if (having && !group_list && !sum_func_count)
1661   {
1662     if (!conds)
1663     {
1664       conds= having;
1665       having= 0;
1666     }
1667     else if ((conds=new (thd->mem_root) Item_cond_and(conds,having)))
1668     {
1669       /*
1670         Item_cond_and can't be fixed after creation, so we do not check
1671         conds->fixed
1672       */
1673       conds->fix_fields(thd, &conds);
1674       conds->change_ref_to_fields(thd, tables_list);
1675       conds->top_level_item();
1676       having= 0;
1677     }
1678   }
1679 #endif
1680 
1681   SELECT_LEX *sel= select_lex;
1682   if (sel->first_cond_optimization)
1683   {
1684     /*
1685       The following code will allocate the new items in a permanent
1686       MEMROOT for prepared statements and stored procedures.
1687 
1688       But first we need to ensure that thd->lex->explain is allocated
1689       in the execution arena
1690     */
1691     create_explain_query_if_not_exists(thd->lex, thd->mem_root);
1692 
1693     Query_arena *arena, backup;
1694     arena= thd->activate_stmt_arena_if_needed(&backup);
1695 
1696     sel->first_cond_optimization= 0;
1697 
1698     /* Convert all outer joins to inner joins if possible */
1699     conds= simplify_joins(this, join_list, conds, TRUE, FALSE);
1700     if (thd->is_error() || select_lex->save_leaf_tables(thd))
1701     {
1702       if (arena)
1703         thd->restore_active_arena(arena, &backup);
1704       DBUG_RETURN(1);
1705     }
1706     build_bitmap_for_nested_joins(join_list, 0);
1707 
1708     sel->prep_where= conds ? conds->copy_andor_structure(thd) : 0;
1709 
1710     sel->where= conds;
1711 
1712     select_lex->update_used_tables();
1713 
1714     if (arena)
1715       thd->restore_active_arena(arena, &backup);
1716   }
1717 
1718   if (optimize_constant_subqueries())
1719     DBUG_RETURN(1);
1720 
1721   if (conds && conds->with_subquery())
1722     (void) conds->walk(&Item::cleanup_is_expensive_cache_processor,
1723                        0, (void *) 0);
1724   if (having && having->with_subquery())
1725     (void) having->walk(&Item::cleanup_is_expensive_cache_processor,
1726 			0, (void *) 0);
1727 
1728   if (setup_jtbm_semi_joins(this, join_list, &conds))
1729     DBUG_RETURN(1);
1730 
1731   if (select_lex->cond_pushed_into_where)
1732   {
1733     conds= and_conds(thd, conds, select_lex->cond_pushed_into_where);
1734     if (conds && conds->fix_fields(thd, &conds))
1735       DBUG_RETURN(1);
1736   }
1737   if (select_lex->cond_pushed_into_having)
1738   {
1739     having= and_conds(thd, having, select_lex->cond_pushed_into_having);
1740     if (having)
1741     {
1742       select_lex->having_fix_field= 1;
1743       select_lex->having_fix_field_for_pushed_cond= 1;
1744       if (having->fix_fields(thd, &having))
1745         DBUG_RETURN(1);
1746       select_lex->having_fix_field= 0;
1747       select_lex->having_fix_field_for_pushed_cond= 0;
1748     }
1749   }
1750 
1751   bool ignore_on_expr= false;
1752   /*
1753     PS/SP note: on_expr of versioned table can not be reallocated
1754     (see build_equal_items() below) because it can be not rebuilt
1755     at second invocation.
1756   */
1757   if (!thd->stmt_arena->is_conventional() && thd->mem_root != thd->stmt_arena->mem_root)
1758     for (TABLE_LIST *tbl= tables_list; tbl; tbl= tbl->next_local)
1759       if (tbl->table && tbl->on_expr && tbl->table->versioned())
1760       {
1761         ignore_on_expr= true;
1762         break;
1763       }
1764   conds= optimize_cond(this, conds, join_list, ignore_on_expr,
1765                        &cond_value, &cond_equal, OPT_LINK_EQUAL_FIELDS);
1766 
1767   if (thd->is_error())
1768   {
1769     error= 1;
1770     DBUG_PRINT("error",("Error from optimize_cond"));
1771     DBUG_RETURN(1);
1772   }
1773 
1774   if (optimizer_flag(thd, OPTIMIZER_SWITCH_COND_PUSHDOWN_FOR_DERIVED))
1775   {
1776     TABLE_LIST *tbl;
1777     List_iterator_fast<TABLE_LIST> li(select_lex->leaf_tables);
1778     while ((tbl= li++))
1779     {
1780       /*
1781         Do not push conditions from where into materialized inner tables
1782         of outer joins: this is not valid.
1783       */
1784       if (tbl->is_materialized_derived())
1785       {
1786         JOIN *join= tbl->get_unit()->first_select()->join;
1787         if (join &&
1788             join->optimization_state == JOIN::OPTIMIZATION_PHASE_1_DONE &&
1789             join->with_two_phase_optimization)
1790           continue;
1791         /*
1792           Do not push conditions from where into materialized inner tables
1793           of outer joins: this is not valid.
1794         */
1795         if (!tbl->is_inner_table_of_outer_join())
1796 	{
1797           if (pushdown_cond_for_derived(thd, conds, tbl))
1798 	    DBUG_RETURN(1);
1799         }
1800 	if (mysql_handle_single_derived(thd->lex, tbl, DT_OPTIMIZE))
1801 	  DBUG_RETURN(1);
1802       }
1803     }
1804   }
1805   else
1806   {
1807     /* Run optimize phase for all derived tables/views used in this SELECT. */
1808     if (select_lex->handle_derived(thd->lex, DT_OPTIMIZE))
1809       DBUG_RETURN(1);
1810   }
1811 
1812   {
1813     having= optimize_cond(this, having, join_list, TRUE,
1814                           &having_value, &having_equal);
1815 
1816     if (unlikely(thd->is_error()))
1817     {
1818       error= 1;
1819       DBUG_PRINT("error",("Error from optimize_cond"));
1820       DBUG_RETURN(1);
1821     }
1822     if (select_lex->where)
1823     {
1824       select_lex->cond_value= cond_value;
1825       if (sel->where != conds && cond_value == Item::COND_OK)
1826         thd->change_item_tree(&sel->where, conds);
1827     }
1828     if (select_lex->having)
1829     {
1830       select_lex->having_value= having_value;
1831       if (sel->having != having && having_value == Item::COND_OK)
1832         thd->change_item_tree(&sel->having, having);
1833     }
1834     if (cond_value == Item::COND_FALSE || having_value == Item::COND_FALSE ||
1835         (!unit->select_limit_cnt && !(select_options & OPTION_FOUND_ROWS)))
1836     {						/* Impossible cond */
1837       if (unit->select_limit_cnt)
1838       {
1839         DBUG_PRINT("info", (having_value == Item::COND_FALSE ?
1840                               "Impossible HAVING" : "Impossible WHERE"));
1841         zero_result_cause=  having_value == Item::COND_FALSE ?
1842                              "Impossible HAVING" : "Impossible WHERE";
1843       }
1844       else
1845       {
1846         DBUG_PRINT("info", ("Zero limit"));
1847         zero_result_cause= "Zero limit";
1848       }
1849       table_count= top_join_tab_count= 0;
1850       handle_implicit_grouping_with_window_funcs();
1851       error= 0;
1852       subq_exit_fl= true;
1853       goto setup_subq_exit;
1854     }
1855   }
1856 
1857 #ifdef WITH_PARTITION_STORAGE_ENGINE
1858   {
1859     TABLE_LIST *tbl;
1860     List_iterator_fast<TABLE_LIST> li(select_lex->leaf_tables);
1861     while ((tbl= li++))
1862     {
1863       Item **prune_cond= get_sargable_cond(this, tbl->table);
1864       tbl->table->all_partitions_pruned_away=
1865         prune_partitions(thd, tbl->table, *prune_cond);
1866     }
1867   }
1868 #endif
1869 
1870   /*
1871      Try to optimize count(*), MY_MIN() and MY_MAX() to const fields if
1872      there is implicit grouping (aggregate functions but no
1873      group_list). In this case, the result set shall only contain one
1874      row.
1875   */
1876   if (tables_list && implicit_grouping)
1877   {
1878     int res;
1879     /*
1880       opt_sum_query() returns HA_ERR_KEY_NOT_FOUND if no rows match
1881       to the WHERE conditions,
1882       or 1 if all items were resolved (optimized away),
1883       or 0, or an error number HA_ERR_...
1884 
1885       If all items were resolved by opt_sum_query, there is no need to
1886       open any tables.
1887     */
1888     if ((res=opt_sum_query(thd, select_lex->leaf_tables, all_fields, conds)))
1889     {
1890       DBUG_ASSERT(res >= 0);
1891       if (res == HA_ERR_KEY_NOT_FOUND)
1892       {
1893         DBUG_PRINT("info",("No matching min/max row"));
1894 	zero_result_cause= "No matching min/max row";
1895         table_count= top_join_tab_count= 0;
1896 	error=0;
1897         subq_exit_fl= true;
1898         handle_implicit_grouping_with_window_funcs();
1899         goto setup_subq_exit;
1900       }
1901       if (res > 1)
1902       {
1903         error= res;
1904         DBUG_PRINT("error",("Error from opt_sum_query"));
1905         DBUG_RETURN(1);
1906       }
1907 
1908       DBUG_PRINT("info",("Select tables optimized away"));
1909       if (!select_lex->have_window_funcs())
1910         zero_result_cause= "Select tables optimized away";
1911       tables_list= 0;				// All tables resolved
1912       select_lex->min_max_opt_list.empty();
1913       const_tables= top_join_tab_count= table_count;
1914       handle_implicit_grouping_with_window_funcs();
1915       /*
1916         Extract all table-independent conditions and replace the WHERE
1917         clause with them. All other conditions were computed by opt_sum_query
1918         and the MIN/MAX/COUNT function(s) have been replaced by constants,
1919         so there is no need to compute the whole WHERE clause again.
1920         Notice that make_cond_for_table() will always succeed to remove all
1921         computed conditions, because opt_sum_query() is applicable only to
1922         conjunctions.
1923         Preserve conditions for EXPLAIN.
1924       */
1925       if (conds && !(thd->lex->describe & DESCRIBE_EXTENDED))
1926       {
1927         COND *table_independent_conds=
1928           make_cond_for_table(thd, conds, PSEUDO_TABLE_BITS, 0, -1,
1929                               FALSE, FALSE);
1930         DBUG_EXECUTE("where",
1931                      print_where(table_independent_conds,
1932                                  "where after opt_sum_query()",
1933                                  QT_ORDINARY););
1934         conds= table_independent_conds;
1935       }
1936     }
1937   }
1938   if (!tables_list)
1939   {
1940     DBUG_PRINT("info",("No tables"));
1941     error= 0;
1942     subq_exit_fl= true;
1943     goto setup_subq_exit;
1944   }
1945   error= -1;					// Error is sent to client
1946   /* get_sort_by_table() call used to be here: */
1947   MEM_UNDEFINED(&sort_by_table, sizeof(sort_by_table));
1948 
1949   /*
1950     We have to remove constants and duplicates from group_list before
1951     calling make_join_statistics() as this may call get_best_group_min_max()
1952     which needs a simplfied group_list.
1953   */
1954   if (group_list && table_count == 1)
1955   {
1956     group_list= remove_const(this, group_list, conds,
1957                              rollup.state == ROLLUP::STATE_NONE,
1958                              &simple_group);
1959     if (unlikely(thd->is_error()))
1960     {
1961       error= 1;
1962       DBUG_RETURN(1);
1963     }
1964     if (!group_list)
1965     {
1966       /* The output has only one row */
1967       order=0;
1968       simple_order=1;
1969       group_optimized_away= 1;
1970       select_distinct=0;
1971     }
1972   }
1973 
1974   /* Calculate how to do the join */
1975   THD_STAGE_INFO(thd, stage_statistics);
1976   result->prepare_to_read_rows();
1977   if (unlikely(make_join_statistics(this, select_lex->leaf_tables,
1978                                     &keyuse)) ||
1979       unlikely(thd->is_fatal_error))
1980   {
1981     DBUG_PRINT("error",("Error: make_join_statistics() failed"));
1982     DBUG_RETURN(1);
1983   }
1984 
1985   /*
1986     If a splittable materialized derived/view dt_i is embedded into
1987     into another splittable materialized derived/view dt_o then
1988     splitting plans for dt_i and dt_o are evaluated independently.
1989     First the optimizer looks for the best splitting plan sp_i for dt_i.
1990     It happens when non-splitting plans for dt_o are evaluated.
1991     The cost of sp_i is considered as the cost of materialization of dt_i
1992     when evaluating any splitting plan for dt_o.
1993   */
1994   if (fix_all_splittings_in_plan())
1995     DBUG_RETURN(1);
1996 
1997 setup_subq_exit:
1998   with_two_phase_optimization= check_two_phase_optimization(thd);
1999   if (with_two_phase_optimization)
2000     optimization_state= JOIN::OPTIMIZATION_PHASE_1_DONE;
2001   else
2002   {
2003     if (optimize_stage2())
2004       DBUG_RETURN(1);
2005   }
2006   DBUG_RETURN(0);
2007 }
2008 
2009 
optimize_stage2()2010 int JOIN::optimize_stage2()
2011 {
2012   ulonglong select_opts_for_readinfo;
2013   uint no_jbuf_after;
2014   JOIN_TAB *tab;
2015   DBUG_ENTER("JOIN::optimize_stage2");
2016 
2017   if (subq_exit_fl)
2018     goto setup_subq_exit;
2019 
2020   if (unlikely(thd->check_killed()))
2021     DBUG_RETURN(1);
2022 
2023   /* Generate an execution plan from the found optimal join order. */
2024   if (get_best_combination())
2025     DBUG_RETURN(1);
2026 
2027   if (select_lex->handle_derived(thd->lex, DT_OPTIMIZE))
2028     DBUG_RETURN(1);
2029 
2030   if (optimizer_flag(thd, OPTIMIZER_SWITCH_DERIVED_WITH_KEYS))
2031     drop_unused_derived_keys();
2032 
2033   if (rollup.state != ROLLUP::STATE_NONE)
2034   {
2035     if (rollup_process_const_fields())
2036     {
2037       DBUG_PRINT("error", ("Error: rollup_process_fields() failed"));
2038       DBUG_RETURN(1);
2039     }
2040   }
2041   else
2042   {
2043     /* Remove distinct if only const tables */
2044     select_distinct= select_distinct && (const_tables != table_count);
2045   }
2046 
2047   THD_STAGE_INFO(thd, stage_preparing);
2048   if (result->initialize_tables(this))
2049   {
2050     DBUG_PRINT("error",("Error: initialize_tables() failed"));
2051     DBUG_RETURN(1);				// error == -1
2052   }
2053   if (const_table_map != found_const_table_map &&
2054       !(select_options & SELECT_DESCRIBE))
2055   {
2056     // There is at least one empty const table
2057     zero_result_cause= "no matching row in const table";
2058     DBUG_PRINT("error",("Error: %s", zero_result_cause));
2059     error= 0;
2060     handle_implicit_grouping_with_window_funcs();
2061     goto setup_subq_exit;
2062   }
2063   if (!(thd->variables.option_bits & OPTION_BIG_SELECTS) &&
2064       best_read > (double) thd->variables.max_join_size &&
2065       !(select_options & SELECT_DESCRIBE))
2066   {						/* purecov: inspected */
2067     my_message(ER_TOO_BIG_SELECT, ER_THD(thd, ER_TOO_BIG_SELECT), MYF(0));
2068     error= -1;
2069     DBUG_RETURN(1);
2070   }
2071   if (const_tables && !thd->locked_tables_mode &&
2072       !(select_options & SELECT_NO_UNLOCK))
2073   {
2074     /*
2075       Unlock all tables, except sequences, as accessing these may still
2076       require table updates
2077     */
2078     mysql_unlock_some_tables(thd, table, const_tables,
2079                              GET_LOCK_SKIP_SEQUENCES);
2080   }
2081   if (!conds && outer_join)
2082   {
2083     /* Handle the case where we have an OUTER JOIN without a WHERE */
2084     conds= new (thd->mem_root) Item_int(thd, (longlong) 1,1); // Always true
2085   }
2086 
2087   if (impossible_where)
2088   {
2089     zero_result_cause=
2090       "Impossible WHERE noticed after reading const tables";
2091     select_lex->mark_const_derived(zero_result_cause);
2092     handle_implicit_grouping_with_window_funcs();
2093     goto setup_subq_exit;
2094   }
2095 
2096   select= make_select(*table, const_table_map,
2097                       const_table_map, conds, (SORT_INFO*) 0, 1, &error);
2098   if (unlikely(error))
2099   {						/* purecov: inspected */
2100     error= -1;					/* purecov: inspected */
2101     DBUG_PRINT("error",("Error: make_select() failed"));
2102     DBUG_RETURN(1);
2103   }
2104 
2105   reset_nj_counters(this, join_list);
2106   if (make_outerjoin_info(this))
2107   {
2108     DBUG_RETURN(1);
2109   }
2110 
2111   /*
2112     Among the equal fields belonging to the same multiple equality
2113     choose the one that is to be retrieved first and substitute
2114     all references to these in where condition for a reference for
2115     the selected field.
2116   */
2117   if (conds)
2118   {
2119     conds= substitute_for_best_equal_field(thd, NO_PARTICULAR_TAB, conds,
2120                                            cond_equal, map2table);
2121     if (unlikely(thd->is_error()))
2122     {
2123       error= 1;
2124       DBUG_PRINT("error",("Error from substitute_for_best_equal"));
2125       DBUG_RETURN(1);
2126     }
2127     conds->update_used_tables();
2128     DBUG_EXECUTE("where",
2129                  print_where(conds,
2130                              "after substitute_best_equal",
2131                              QT_ORDINARY););
2132   }
2133 
2134   /*
2135     Perform the optimization on fields evaluation mentioned above
2136     for all on expressions.
2137   */
2138   for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); tab;
2139        tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
2140   {
2141     if (*tab->on_expr_ref)
2142     {
2143       *tab->on_expr_ref= substitute_for_best_equal_field(thd, NO_PARTICULAR_TAB,
2144                                                          *tab->on_expr_ref,
2145                                                          tab->cond_equal,
2146                                                          map2table);
2147       if (unlikely(thd->is_error()))
2148       {
2149         error= 1;
2150         DBUG_PRINT("error",("Error from substitute_for_best_equal"));
2151         DBUG_RETURN(1);
2152       }
2153       (*tab->on_expr_ref)->update_used_tables();
2154     }
2155   }
2156 
2157   /*
2158     Perform the optimization on fields evaliation mentioned above
2159     for all used ref items.
2160   */
2161   for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); tab;
2162        tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
2163   {
2164     uint key_copy_index=0;
2165     for (uint i=0; i < tab->ref.key_parts; i++)
2166     {
2167       Item **ref_item_ptr= tab->ref.items+i;
2168       Item *ref_item= *ref_item_ptr;
2169       if (!ref_item->used_tables() && !(select_options & SELECT_DESCRIBE))
2170         continue;
2171       COND_EQUAL *equals= cond_equal;
2172       JOIN_TAB *first_inner= tab->first_inner;
2173       while (equals)
2174       {
2175         ref_item= substitute_for_best_equal_field(thd, tab, ref_item,
2176                                                   equals, map2table);
2177         if (unlikely(thd->is_fatal_error))
2178           DBUG_RETURN(1);
2179 
2180         if (first_inner)
2181 	{
2182           equals= first_inner->cond_equal;
2183           first_inner= first_inner->first_upper;
2184         }
2185         else
2186           equals= 0;
2187       }
2188       ref_item->update_used_tables();
2189       if (*ref_item_ptr != ref_item)
2190       {
2191         *ref_item_ptr= ref_item;
2192         Item *item= ref_item->real_item();
2193         store_key *key_copy= tab->ref.key_copy[key_copy_index];
2194         if (key_copy->type() == store_key::FIELD_STORE_KEY)
2195         {
2196           if (item->basic_const_item())
2197           {
2198             /* It is constant propagated here */
2199             tab->ref.key_copy[key_copy_index]=
2200               new store_key_const_item(*tab->ref.key_copy[key_copy_index],
2201                                        item);
2202           }
2203           else if (item->const_item())
2204 	  {
2205             tab->ref.key_copy[key_copy_index]=
2206               new store_key_item(*tab->ref.key_copy[key_copy_index],
2207                                  item, TRUE);
2208           }
2209           else
2210           {
2211             store_key_field *field_copy= ((store_key_field *)key_copy);
2212             DBUG_ASSERT(item->type() == Item::FIELD_ITEM);
2213             field_copy->change_source_field((Item_field *) item);
2214           }
2215         }
2216       }
2217       key_copy_index++;
2218     }
2219   }
2220 
2221   if (conds && const_table_map != found_const_table_map &&
2222       (select_options & SELECT_DESCRIBE))
2223   {
2224     conds=new (thd->mem_root) Item_int(thd, (longlong) 0, 1); // Always false
2225   }
2226 
2227   /* Cache constant expressions in WHERE, HAVING, ON clauses. */
2228   cache_const_exprs();
2229 
2230   if (setup_semijoin_loosescan(this))
2231     DBUG_RETURN(1);
2232 
2233   if (make_join_select(this, select, conds))
2234   {
2235     zero_result_cause=
2236       "Impossible WHERE noticed after reading const tables";
2237     select_lex->mark_const_derived(zero_result_cause);
2238     handle_implicit_grouping_with_window_funcs();
2239     goto setup_subq_exit;
2240   }
2241 
2242   error= -1;					/* if goto err */
2243 
2244   /* Optimize distinct away if possible */
2245   {
2246     ORDER *org_order= order;
2247     order=remove_const(this, order,conds,1, &simple_order);
2248     if (unlikely(thd->is_error()))
2249     {
2250       error= 1;
2251       DBUG_RETURN(1);
2252     }
2253 
2254     /*
2255       If we are using ORDER BY NULL or ORDER BY const_expression,
2256       return result in any order (even if we are using a GROUP BY)
2257     */
2258     if (!order && org_order)
2259       skip_sort_order= 1;
2260   }
2261   /*
2262      Check if we can optimize away GROUP BY/DISTINCT.
2263      We can do that if there are no aggregate functions, the
2264      fields in DISTINCT clause (if present) and/or columns in GROUP BY
2265      (if present) contain direct references to all key parts of
2266      an unique index (in whatever order) and if the key parts of the
2267      unique index cannot contain NULLs.
2268      Note that the unique keys for DISTINCT and GROUP BY should not
2269      be the same (as long as they are unique).
2270 
2271      The FROM clause must contain a single non-constant table.
2272   */
2273   if (table_count - const_tables == 1 && (group || select_distinct) &&
2274       !tmp_table_param.sum_func_count &&
2275       (!join_tab[const_tables].select ||
2276        !join_tab[const_tables].select->quick ||
2277        join_tab[const_tables].select->quick->get_type() !=
2278        QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX) &&
2279       !select_lex->have_window_funcs())
2280   {
2281     if (group && rollup.state == ROLLUP::STATE_NONE &&
2282        list_contains_unique_index(join_tab[const_tables].table,
2283                                  find_field_in_order_list,
2284                                  (void *) group_list))
2285     {
2286       /*
2287         We have found that grouping can be removed since groups correspond to
2288         only one row anyway, but we still have to guarantee correct result
2289         order. The line below effectively rewrites the query from GROUP BY
2290         <fields> to ORDER BY <fields>. There are three exceptions:
2291         - if skip_sort_order is set (see above), then we can simply skip
2292           GROUP BY;
2293         - if we are in a subquery, we don't have to maintain order unless there
2294 	  is a limit clause in the subquery.
2295         - we can only rewrite ORDER BY if the ORDER BY fields are 'compatible'
2296           with the GROUP BY ones, i.e. either one is a prefix of another.
2297           We only check if the ORDER BY is a prefix of GROUP BY. In this case
2298           test_if_subpart() copies the ASC/DESC attributes from the original
2299           ORDER BY fields.
2300           If GROUP BY is a prefix of ORDER BY, then it is safe to leave
2301           'order' as is.
2302        */
2303       if (!order || test_if_subpart(group_list, order))
2304       {
2305         if (skip_sort_order ||
2306             (select_lex->master_unit()->item && select_limit == HA_POS_ERROR)) // This is a subquery
2307           order= NULL;
2308         else
2309           order= group_list;
2310       }
2311       /*
2312         If we have an IGNORE INDEX FOR GROUP BY(fields) clause, this must be
2313         rewritten to IGNORE INDEX FOR ORDER BY(fields).
2314       */
2315       join_tab->table->keys_in_use_for_order_by=
2316         join_tab->table->keys_in_use_for_group_by;
2317       group_list= 0;
2318       group= 0;
2319     }
2320     if (select_distinct &&
2321        list_contains_unique_index(join_tab[const_tables].table,
2322                                  find_field_in_item_list,
2323                                  (void *) &fields_list))
2324     {
2325       select_distinct= 0;
2326     }
2327   }
2328   if (group || tmp_table_param.sum_func_count)
2329   {
2330     if (! hidden_group_fields && rollup.state == ROLLUP::STATE_NONE
2331         && !select_lex->have_window_funcs())
2332       select_distinct=0;
2333   }
2334   else if (select_distinct && table_count - const_tables == 1 &&
2335            rollup.state == ROLLUP::STATE_NONE &&
2336            !select_lex->have_window_funcs())
2337   {
2338     /*
2339       We are only using one table. In this case we change DISTINCT to a
2340       GROUP BY query if:
2341       - The GROUP BY can be done through indexes (no sort) and the ORDER
2342         BY only uses selected fields.
2343 	(In this case we can later optimize away GROUP BY and ORDER BY)
2344       - We are scanning the whole table without LIMIT
2345         This can happen if:
2346         - We are using CALC_FOUND_ROWS
2347         - We are using an ORDER BY that can't be optimized away.
2348 
2349       We don't want to use this optimization when we are using LIMIT
2350       because in this case we can just create a temporary table that
2351       holds LIMIT rows and stop when this table is full.
2352     */
2353     bool all_order_fields_used;
2354 
2355     tab= &join_tab[const_tables];
2356     if (order)
2357     {
2358       skip_sort_order=
2359         test_if_skip_sort_order(tab, order, select_limit,
2360                                 true,           // no_changes
2361                                 &tab->table->keys_in_use_for_order_by);
2362     }
2363     if ((group_list=create_distinct_group(thd, select_lex->ref_pointer_array,
2364                                           order, fields_list, all_fields,
2365 				          &all_order_fields_used)))
2366     {
2367       const bool skip_group=
2368         skip_sort_order &&
2369         test_if_skip_sort_order(tab, group_list, select_limit,
2370                                   true,         // no_changes
2371                                   &tab->table->keys_in_use_for_group_by);
2372       count_field_types(select_lex, &tmp_table_param, all_fields, 0);
2373       if ((skip_group && all_order_fields_used) ||
2374 	  select_limit == HA_POS_ERROR ||
2375 	  (order && !skip_sort_order))
2376       {
2377 	/*  Change DISTINCT to GROUP BY */
2378 	select_distinct= 0;
2379 	no_order= !order;
2380 	if (all_order_fields_used)
2381 	{
2382 	  if (order && skip_sort_order)
2383 	  {
2384 	    /*
2385 	      Force MySQL to read the table in sorted order to get result in
2386 	      ORDER BY order.
2387 	    */
2388 	    tmp_table_param.quick_group=0;
2389 	  }
2390 	  order=0;
2391         }
2392 	group=1;				// For end_write_group
2393       }
2394       else
2395 	group_list= 0;
2396     }
2397     else if (thd->is_fatal_error)			// End of memory
2398       DBUG_RETURN(1);
2399   }
2400   simple_group= rollup.state == ROLLUP::STATE_NONE;
2401   if (group)
2402   {
2403     /*
2404       Update simple_group and group_list as we now have more information, like
2405       which tables or columns are constant.
2406     */
2407     group_list= remove_const(this, group_list, conds,
2408                              rollup.state == ROLLUP::STATE_NONE,
2409                              &simple_group);
2410     if (unlikely(thd->is_error()))
2411     {
2412       error= 1;
2413       DBUG_RETURN(1);
2414     }
2415     if (!group_list)
2416     {
2417       /* The output has only one row */
2418       order=0;
2419       simple_order=1;
2420       select_distinct= 0;
2421       group_optimized_away= 1;
2422     }
2423   }
2424 
2425   calc_group_buffer(this, group_list);
2426   send_group_parts= tmp_table_param.group_parts; /* Save org parts */
2427   if (procedure && procedure->group)
2428   {
2429     group_list= procedure->group= remove_const(this, procedure->group, conds,
2430 					       1, &simple_group);
2431     if (unlikely(thd->is_error()))
2432     {
2433       error= 1;
2434       DBUG_RETURN(1);
2435     }
2436     calc_group_buffer(this, group_list);
2437   }
2438 
2439   if (test_if_subpart(group_list, order) ||
2440       (!group_list && tmp_table_param.sum_func_count))
2441   {
2442     order=0;
2443     if (is_indexed_agg_distinct(this, NULL))
2444       sort_and_group= 0;
2445   }
2446 
2447   // Can't use sort on head table if using join buffering
2448   if (full_join || hash_join)
2449   {
2450     TABLE *stable= (sort_by_table == (TABLE *) 1 ?
2451       join_tab[const_tables].table : sort_by_table);
2452     /*
2453       FORCE INDEX FOR ORDER BY can be used to prevent join buffering when
2454       sorting on the first table.
2455     */
2456     if (!stable || (!stable->force_index_order &&
2457                     !map2table[stable->tablenr]->keep_current_rowid))
2458     {
2459       if (group_list)
2460         simple_group= 0;
2461       if (order)
2462         simple_order= 0;
2463     }
2464   }
2465 
2466   need_tmp= test_if_need_tmp_table();
2467 
2468   /*
2469     If window functions are present then we can't have simple_order set to
2470     TRUE as the window function needs a temp table for computation.
2471     ORDER BY is computed after the window function computation is done, so
2472     the sort will be done on the temp table.
2473   */
2474   if (select_lex->have_window_funcs())
2475     simple_order= FALSE;
2476 
2477 
2478   /*
2479     If the hint FORCE INDEX FOR ORDER BY/GROUP BY is used for the table
2480     whose columns are required to be returned in a sorted order, then
2481     the proper value for no_jbuf_after should be yielded by a call to
2482     the make_join_orderinfo function.
2483     Yet the current implementation of FORCE INDEX hints does not
2484     allow us to do it in a clean manner.
2485   */
2486   no_jbuf_after= 1 ? table_count : make_join_orderinfo(this);
2487 
2488   // Don't use join buffering when we use MATCH
2489   select_opts_for_readinfo=
2490     (select_options & (SELECT_DESCRIBE | SELECT_NO_JOIN_CACHE)) |
2491     (select_lex->ftfunc_list->elements ?  SELECT_NO_JOIN_CACHE : 0);
2492 
2493   if (select_lex->options & OPTION_SCHEMA_TABLE &&
2494        optimize_schema_tables_reads(this))
2495     DBUG_RETURN(1);
2496 
2497   if (make_join_readinfo(this, select_opts_for_readinfo, no_jbuf_after))
2498     DBUG_RETURN(1);
2499 
2500   /* Perform FULLTEXT search before all regular searches */
2501   if (!(select_options & SELECT_DESCRIBE))
2502     if (init_ftfuncs(thd, select_lex, MY_TEST(order)))
2503       DBUG_RETURN(1);
2504 
2505   /*
2506     It's necessary to check const part of HAVING cond as
2507     there is a chance that some cond parts may become
2508     const items after make_join_statistics(for example
2509     when Item is a reference to cost table field from
2510     outer join).
2511     This check is performed only for those conditions
2512     which do not use aggregate functions. In such case
2513     temporary table may not be used and const condition
2514     elements may be lost during further having
2515     condition transformation in JOIN::exec.
2516   */
2517   if (having && const_table_map && !having->with_sum_func)
2518   {
2519     having->update_used_tables();
2520     having= having->remove_eq_conds(thd, &select_lex->having_value, true);
2521     if (select_lex->having_value == Item::COND_FALSE)
2522     {
2523       having= new (thd->mem_root) Item_int(thd, (longlong) 0,1);
2524       zero_result_cause= "Impossible HAVING noticed after reading const tables";
2525       error= 0;
2526       select_lex->mark_const_derived(zero_result_cause);
2527       goto setup_subq_exit;
2528     }
2529   }
2530 
2531   if (optimize_unflattened_subqueries())
2532     DBUG_RETURN(1);
2533 
2534   int res;
2535   if ((res= rewrite_to_index_subquery_engine(this)) != -1)
2536     DBUG_RETURN(res);
2537   if (setup_subquery_caches())
2538     DBUG_RETURN(-1);
2539 
2540   /*
2541     Need to tell handlers that to play it safe, it should fetch all
2542     columns of the primary key of the tables: this is because MySQL may
2543     build row pointers for the rows, and for all columns of the primary key
2544     the read set has not necessarily been set by the server code.
2545   */
2546   if (need_tmp || select_distinct || group_list || order)
2547   {
2548     for (uint i= 0; i < table_count; i++)
2549     {
2550       if (!(table[i]->map & const_table_map))
2551         table[i]->prepare_for_position();
2552     }
2553   }
2554 
2555   DBUG_EXECUTE("info",TEST_join(this););
2556 
2557   if (!only_const_tables())
2558   {
2559      JOIN_TAB *tab= &join_tab[const_tables];
2560 
2561     if (order)
2562     {
2563       /*
2564         Force using of tmp table if sorting by a SP or UDF function due to
2565         their expensive and probably non-deterministic nature.
2566       */
2567       for (ORDER *tmp_order= order; tmp_order ; tmp_order=tmp_order->next)
2568       {
2569         Item *item= *tmp_order->item;
2570         if (item->is_expensive())
2571         {
2572           /* Force tmp table without sort */
2573           need_tmp=1; simple_order=simple_group=0;
2574           break;
2575         }
2576       }
2577     }
2578 
2579     /*
2580       Because filesort always does a full table scan or a quick range scan
2581       we must add the removed reference to the select for the table.
2582       We only need to do this when we have a simple_order or simple_group
2583       as in other cases the join is done before the sort.
2584     */
2585     if ((order || group_list) &&
2586         tab->type != JT_ALL &&
2587         tab->type != JT_FT &&
2588         tab->type != JT_REF_OR_NULL &&
2589         ((order && simple_order) || (group_list && simple_group)))
2590     {
2591       if (add_ref_to_table_cond(thd,tab)) {
2592         DBUG_RETURN(1);
2593       }
2594     }
2595     /*
2596       Investigate whether we may use an ordered index as part of either
2597       DISTINCT, GROUP BY or ORDER BY execution. An ordered index may be
2598       used for only the first of any of these terms to be executed. This
2599       is reflected in the order which we check for test_if_skip_sort_order()
2600       below. However we do not check for DISTINCT here, as it would have
2601       been transformed to a GROUP BY at this stage if it is a candidate for
2602       ordered index optimization.
2603       If a decision was made to use an ordered index, the availability
2604       of such an access path is stored in 'ordered_index_usage' for later
2605       use by 'execute' or 'explain'
2606     */
2607     DBUG_ASSERT(ordered_index_usage == ordered_index_void);
2608 
2609     if (group_list)   // GROUP BY honoured first
2610                       // (DISTINCT was rewritten to GROUP BY if skippable)
2611     {
2612       /*
2613         When there is SQL_BIG_RESULT do not sort using index for GROUP BY,
2614         and thus force sorting on disk unless a group min-max optimization
2615         is going to be used as it is applied now only for one table queries
2616         with covering indexes.
2617       */
2618       if (!(select_options & SELECT_BIG_RESULT) ||
2619             (tab->select &&
2620              tab->select->quick &&
2621              tab->select->quick->get_type() ==
2622              QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX))
2623       {
2624         if (simple_group &&              // GROUP BY is possibly skippable
2625             !select_distinct)            // .. if not preceded by a DISTINCT
2626         {
2627           /*
2628             Calculate a possible 'limit' of table rows for 'GROUP BY':
2629             A specified 'LIMIT' is relative to the final resultset.
2630             'need_tmp' implies that there will be more postprocessing
2631             so the specified 'limit' should not be enforced yet.
2632            */
2633           const ha_rows limit = need_tmp ? HA_POS_ERROR : select_limit;
2634           if (test_if_skip_sort_order(tab, group_list, limit, false,
2635                                       &tab->table->keys_in_use_for_group_by))
2636           {
2637             ordered_index_usage= ordered_index_group_by;
2638           }
2639         }
2640 
2641 	/*
2642 	  If we are going to use semi-join LooseScan, it will depend
2643 	  on the selected index scan to be used.  If index is not used
2644 	  for the GROUP BY, we risk that sorting is put on the LooseScan
2645 	  table.  In order to avoid this, force use of temporary table.
2646 	  TODO: Explain the quick_group part of the test below.
2647 	 */
2648         if ((ordered_index_usage != ordered_index_group_by) &&
2649             ((tmp_table_param.quick_group && !procedure) ||
2650 	     (tab->emb_sj_nest &&
2651 	      best_positions[const_tables].sj_strategy == SJ_OPT_LOOSE_SCAN)))
2652         {
2653           need_tmp=1;
2654           simple_order= simple_group= false; // Force tmp table without sort
2655         }
2656       }
2657     }
2658     else if (order &&                      // ORDER BY wo/ preceding GROUP BY
2659              (simple_order || skip_sort_order)) // which is possibly skippable
2660     {
2661       if (test_if_skip_sort_order(tab, order, select_limit, false,
2662                                   &tab->table->keys_in_use_for_order_by))
2663       {
2664         ordered_index_usage= ordered_index_order_by;
2665       }
2666     }
2667   }
2668 
2669   if (having)
2670     having_is_correlated= MY_TEST(having->used_tables() & OUTER_REF_TABLE_BIT);
2671   tmp_having= having;
2672 
2673   if (unlikely(thd->is_error()))
2674     DBUG_RETURN(TRUE);
2675 
2676   /*
2677     The loose index scan access method guarantees that all grouping or
2678     duplicate row elimination (for distinct) is already performed
2679     during data retrieval, and that all MIN/MAX functions are already
2680     computed for each group. Thus all MIN/MAX functions should be
2681     treated as regular functions, and there is no need to perform
2682     grouping in the main execution loop.
2683     Notice that currently loose index scan is applicable only for
2684     single table queries, thus it is sufficient to test only the first
2685     join_tab element of the plan for its access method.
2686   */
2687   if (join_tab->is_using_loose_index_scan())
2688   {
2689     tmp_table_param.precomputed_group_by= TRUE;
2690     if (join_tab->is_using_agg_loose_index_scan())
2691     {
2692       need_distinct= FALSE;
2693       tmp_table_param.precomputed_group_by= FALSE;
2694     }
2695   }
2696 
2697   if (make_aggr_tables_info())
2698     DBUG_RETURN(1);
2699 
2700   if (init_join_caches())
2701     DBUG_RETURN(1);
2702 
2703   error= 0;
2704 
2705   if (select_options & SELECT_DESCRIBE)
2706     goto derived_exit;
2707 
2708   DBUG_RETURN(0);
2709 
2710 setup_subq_exit:
2711   /* Choose an execution strategy for this JOIN. */
2712   if (!tables_list || !table_count)
2713   {
2714     choose_tableless_subquery_plan();
2715 
2716     /* The output has atmost one row */
2717     if (group_list)
2718     {
2719       group_list= NULL;
2720       group_optimized_away= 1;
2721       rollup.state= ROLLUP::STATE_NONE;
2722     }
2723     order= NULL;
2724     simple_order= TRUE;
2725     select_distinct= FALSE;
2726 
2727     if (select_lex->have_window_funcs())
2728     {
2729       if (!(join_tab= (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB))))
2730         DBUG_RETURN(1);
2731       need_tmp= 1;
2732     }
2733     if (make_aggr_tables_info())
2734       DBUG_RETURN(1);
2735 
2736     /*
2737       It could be that we've only done optimization stage 1 for
2738       some of the derived tables, and never did stage 2.
2739       Do it now, otherwise Explain data structure will not be complete.
2740     */
2741     if (select_lex->handle_derived(thd->lex, DT_OPTIMIZE))
2742       DBUG_RETURN(1);
2743   }
2744   /*
2745     Even with zero matching rows, subqueries in the HAVING clause may
2746     need to be evaluated if there are aggregate functions in the query.
2747   */
2748   if (optimize_unflattened_subqueries())
2749     DBUG_RETURN(1);
2750   error= 0;
2751 
2752 derived_exit:
2753 
2754   select_lex->mark_const_derived(zero_result_cause);
2755   DBUG_RETURN(0);
2756 }
2757 
2758 /**
2759   Add having condition as a where clause condition of the given temp table.
2760 
2761   @param    tab   Table to which having condition is added.
2762 
2763   @returns  false if success, true if error.
2764 */
2765 
add_having_as_table_cond(JOIN_TAB * tab)2766 bool JOIN::add_having_as_table_cond(JOIN_TAB *tab)
2767 {
2768   tmp_having->update_used_tables();
2769   table_map used_tables= tab->table->map | OUTER_REF_TABLE_BIT;
2770 
2771   /* If tmp table is not used then consider conditions of const table also */
2772   if (!need_tmp)
2773     used_tables|= const_table_map;
2774 
2775   DBUG_ENTER("JOIN::add_having_as_table_cond");
2776 
2777   Item* sort_table_cond= make_cond_for_table(thd, tmp_having, used_tables,
2778                                              (table_map) 0, false,
2779                                              false, false);
2780   if (sort_table_cond)
2781   {
2782     if (!tab->select)
2783     {
2784       if (!(tab->select= new SQL_SELECT))
2785         DBUG_RETURN(true);
2786       tab->select->head= tab->table;
2787     }
2788     if (!tab->select->cond)
2789       tab->select->cond= sort_table_cond;
2790     else
2791     {
2792       if (!(tab->select->cond=
2793 	      new (thd->mem_root) Item_cond_and(thd,
2794                                                 tab->select->cond,
2795                                                 sort_table_cond)))
2796         DBUG_RETURN(true);
2797     }
2798     if (tab->pre_idx_push_select_cond)
2799     {
2800       if (sort_table_cond->type() == Item::COND_ITEM)
2801         sort_table_cond= sort_table_cond->copy_andor_structure(thd);
2802       if (!(tab->pre_idx_push_select_cond=
2803               new (thd->mem_root) Item_cond_and(thd,
2804                                                 tab->pre_idx_push_select_cond,
2805                                                 sort_table_cond)))
2806         DBUG_RETURN(true);
2807     }
2808     if (tab->select->cond)
2809       tab->select->cond->fix_fields_if_needed(thd, 0);
2810     if (tab->pre_idx_push_select_cond)
2811       tab->pre_idx_push_select_cond->fix_fields_if_needed(thd, 0);
2812     tab->select->pre_idx_push_select_cond= tab->pre_idx_push_select_cond;
2813     tab->set_select_cond(tab->select->cond, __LINE__);
2814     tab->select_cond->top_level_item();
2815     DBUG_EXECUTE("where",print_where(tab->select->cond,
2816 				     "select and having",
2817                                      QT_ORDINARY););
2818 
2819     having= make_cond_for_table(thd, tmp_having, ~ (table_map) 0,
2820                                 ~used_tables, false, false, false);
2821     DBUG_EXECUTE("where",
2822                  print_where(having, "having after sort", QT_ORDINARY););
2823   }
2824 
2825   DBUG_RETURN(false);
2826 }
2827 
2828 
add_fields_for_current_rowid(JOIN_TAB * cur,List<Item> * table_fields)2829 bool JOIN::add_fields_for_current_rowid(JOIN_TAB *cur, List<Item> *table_fields)
2830 {
2831   /*
2832     this will not walk into semi-join materialization nests but this is ok
2833     because we will never need to save current rowids for those.
2834   */
2835   for (JOIN_TAB *tab=join_tab; tab < cur; tab++)
2836   {
2837     if (!tab->keep_current_rowid)
2838       continue;
2839     Item *item= new (thd->mem_root) Item_temptable_rowid(tab->table);
2840     item->fix_fields(thd, 0);
2841     table_fields->push_back(item, thd->mem_root);
2842     cur->tmp_table_param->func_count++;
2843   }
2844   return 0;
2845 }
2846 
2847 
2848 /**
2849   Set info for aggregation tables
2850 
2851   @details
2852   This function finalizes execution plan by taking following actions:
2853     .) aggregation temporary tables are created, but not instantiated
2854        (this is done during execution).
2855        JOIN_TABs for aggregation tables are set appropriately
2856        (see JOIN::create_postjoin_aggr_table).
2857     .) prepare fields lists (fields, all_fields, ref_pointer_array slices) for
2858        each required stage of execution. These fields lists are set for
2859        working tables' tabs and for the tab of last table in the join.
2860     .) info for sorting/grouping/dups removal is prepared and saved in
2861        appropriate tabs. Here is an example:
2862 
2863   @returns
2864   false - Ok
2865   true  - Error
2866 */
2867 
make_aggr_tables_info()2868 bool JOIN::make_aggr_tables_info()
2869 {
2870   List<Item> *curr_all_fields= &all_fields;
2871   List<Item> *curr_fields_list= &fields_list;
2872   JOIN_TAB *curr_tab= join_tab + const_tables;
2873   TABLE *exec_tmp_table= NULL;
2874   bool distinct= false;
2875   bool keep_row_order= false;
2876   bool is_having_added_as_table_cond= false;
2877   DBUG_ENTER("JOIN::make_aggr_tables_info");
2878 
2879   const bool has_group_by= this->group;
2880 
2881   sort_and_group_aggr_tab= NULL;
2882 
2883   if (group_optimized_away)
2884     implicit_grouping= true;
2885 
2886   bool implicit_grouping_with_window_funcs= implicit_grouping &&
2887                                             select_lex->have_window_funcs();
2888   bool implicit_grouping_without_tables= implicit_grouping &&
2889                                          !tables_list;
2890 
2891   /*
2892     Setup last table to provide fields and all_fields lists to the next
2893     node in the plan.
2894   */
2895   if (join_tab && top_join_tab_count && tables_list)
2896   {
2897     join_tab[top_join_tab_count - 1].fields= &fields_list;
2898     join_tab[top_join_tab_count - 1].all_fields= &all_fields;
2899   }
2900 
2901   /*
2902     All optimization is done. Check if we can use the storage engines
2903     group by handler to evaluate the group by.
2904     Some storage engines, like spider can also do joins, group by and
2905     distinct in the engine, so we do this for all queries, not only
2906     GROUP BY queries.
2907   */
2908   if (tables_list && top_join_tab_count && !procedure)
2909   {
2910     /*
2911       At the moment we only support push down for queries where
2912       all tables are in the same storage engine
2913     */
2914     TABLE_LIST *tbl= tables_list;
2915     handlerton *ht= tbl && tbl->table ? tbl->table->file->partition_ht() : 0;
2916     for (tbl= tbl->next_local; ht && tbl; tbl= tbl->next_local)
2917     {
2918       if (!tbl->table || tbl->table->file->partition_ht() != ht)
2919         ht= 0;
2920     }
2921 
2922     if (ht && ht->create_group_by)
2923     {
2924       /* Check if the storage engine can intercept the query */
2925       Query query= {&all_fields, select_distinct, tables_list, conds,
2926                     group_list, order ? order : group_list, having};
2927       group_by_handler *gbh= ht->create_group_by(thd, &query);
2928 
2929       if (gbh)
2930       {
2931         if (!(pushdown_query= new (thd->mem_root) Pushdown_query(select_lex, gbh)))
2932           DBUG_RETURN(1);
2933         /*
2934           We must store rows in the tmp table if we need to do an ORDER BY
2935           or DISTINCT and the storage handler can't handle it.
2936         */
2937         need_tmp= query.order_by || query.group_by || query.distinct;
2938         distinct= query.distinct;
2939         keep_row_order= query.order_by || query.group_by;
2940 
2941         order= query.order_by;
2942 
2943         aggr_tables++;
2944         curr_tab= join_tab + exec_join_tab_cnt();
2945         bzero((void*)curr_tab, sizeof(JOIN_TAB));
2946         curr_tab->ref.key= -1;
2947         curr_tab->join= this;
2948 
2949         if (!(curr_tab->tmp_table_param= new TMP_TABLE_PARAM(tmp_table_param)))
2950           DBUG_RETURN(1);
2951         TABLE* table= create_tmp_table(thd, curr_tab->tmp_table_param,
2952                                        all_fields,
2953                                        NULL, query.distinct,
2954                                        TRUE, select_options, HA_POS_ERROR,
2955                                        &empty_clex_str, !need_tmp,
2956                                        query.order_by || query.group_by);
2957         if (!table)
2958           DBUG_RETURN(1);
2959 
2960         if (!(curr_tab->aggr= new (thd->mem_root) AGGR_OP(curr_tab)))
2961           DBUG_RETURN(1);
2962         curr_tab->aggr->set_write_func(::end_send);
2963         curr_tab->table= table;
2964         /*
2965           Setup reference fields, used by summary functions and group by fields,
2966           to point to the temporary table.
2967           The actual switching to the temporary tables fields for HAVING
2968           and ORDER BY is done in do_select() by calling
2969           set_items_ref_array(items1).
2970         */
2971         init_items_ref_array();
2972         items1= ref_ptr_array_slice(2);
2973         //items1= items0 + all_fields.elements;
2974         if (change_to_use_tmp_fields(thd, items1,
2975                                      tmp_fields_list1, tmp_all_fields1,
2976                                      fields_list.elements, all_fields))
2977           DBUG_RETURN(1);
2978 
2979         /* Give storage engine access to temporary table */
2980         gbh->table= table;
2981         pushdown_query->store_data_in_temp_table= need_tmp;
2982         pushdown_query->having= having;
2983 
2984         /*
2985           Group by and having is calculated by the group_by handler.
2986           Reset the group by and having
2987         */
2988         DBUG_ASSERT(query.group_by == NULL);
2989         group= 0; group_list= 0;
2990         having= tmp_having= 0;
2991         /*
2992           Select distinct is handled by handler or by creating an unique index
2993           over all fields in the temporary table
2994         */
2995         select_distinct= 0;
2996         order= query.order_by;
2997         tmp_table_param.field_count+= tmp_table_param.sum_func_count;
2998         tmp_table_param.sum_func_count= 0;
2999 
3000         fields= curr_fields_list;
3001 
3002         //todo: new:
3003         curr_tab->ref_array= &items1;
3004         curr_tab->all_fields= &tmp_all_fields1;
3005         curr_tab->fields= &tmp_fields_list1;
3006 
3007         DBUG_RETURN(thd->is_fatal_error);
3008       }
3009     }
3010   }
3011 
3012 
3013   /*
3014     The loose index scan access method guarantees that all grouping or
3015     duplicate row elimination (for distinct) is already performed
3016     during data retrieval, and that all MIN/MAX functions are already
3017     computed for each group. Thus all MIN/MAX functions should be
3018     treated as regular functions, and there is no need to perform
3019     grouping in the main execution loop.
3020     Notice that currently loose index scan is applicable only for
3021     single table queries, thus it is sufficient to test only the first
3022     join_tab element of the plan for its access method.
3023   */
3024   if (join_tab && top_join_tab_count && tables_list &&
3025       join_tab->is_using_loose_index_scan())
3026     tmp_table_param.precomputed_group_by=
3027       !join_tab->is_using_agg_loose_index_scan();
3028 
3029   group_list_for_estimates= group_list;
3030   /* Create a tmp table if distinct or if the sort is too complicated */
3031   if (need_tmp)
3032   {
3033     aggr_tables++;
3034     curr_tab= join_tab + exec_join_tab_cnt();
3035     bzero((void*)curr_tab, sizeof(JOIN_TAB));
3036     curr_tab->ref.key= -1;
3037     if (only_const_tables())
3038       first_select= sub_select_postjoin_aggr;
3039 
3040     /*
3041       Create temporary table on first execution of this join.
3042       (Will be reused if this is a subquery that is executed several times.)
3043     */
3044     init_items_ref_array();
3045 
3046     ORDER *tmp_group= (ORDER *) 0;
3047     if (!simple_group && !procedure && !(test_flags & TEST_NO_KEY_GROUP))
3048       tmp_group= group_list;
3049 
3050     tmp_table_param.hidden_field_count=
3051       all_fields.elements - fields_list.elements;
3052 
3053     distinct= select_distinct && !group_list &&
3054               !select_lex->have_window_funcs();
3055     keep_row_order= false;
3056     bool save_sum_fields= (group_list && simple_group) ||
3057                            implicit_grouping_with_window_funcs;
3058     if (create_postjoin_aggr_table(curr_tab,
3059                                    &all_fields, tmp_group,
3060                                    save_sum_fields,
3061                                    distinct, keep_row_order))
3062       DBUG_RETURN(true);
3063     exec_tmp_table= curr_tab->table;
3064 
3065     if (exec_tmp_table->distinct)
3066       optimize_distinct();
3067 
3068    /* Change sum_fields reference to calculated fields in tmp_table */
3069     items1= ref_ptr_array_slice(2);
3070     if ((sort_and_group || curr_tab->table->group ||
3071          tmp_table_param.precomputed_group_by) &&
3072          !implicit_grouping_without_tables)
3073     {
3074       if (change_to_use_tmp_fields(thd, items1,
3075                                    tmp_fields_list1, tmp_all_fields1,
3076                                    fields_list.elements, all_fields))
3077         DBUG_RETURN(true);
3078     }
3079     else
3080     {
3081       if (change_refs_to_tmp_fields(thd, items1,
3082                                     tmp_fields_list1, tmp_all_fields1,
3083                                     fields_list.elements, all_fields))
3084         DBUG_RETURN(true);
3085     }
3086     curr_all_fields= &tmp_all_fields1;
3087     curr_fields_list= &tmp_fields_list1;
3088     // Need to set them now for correct group_fields setup, reset at the end.
3089     set_items_ref_array(items1);
3090     curr_tab->ref_array= &items1;
3091     curr_tab->all_fields= &tmp_all_fields1;
3092     curr_tab->fields= &tmp_fields_list1;
3093     set_postjoin_aggr_write_func(curr_tab);
3094 
3095     /*
3096       If having is not handled here, it will be checked before the row is sent
3097       to the client.
3098     */
3099     if (tmp_having &&
3100         (sort_and_group || (exec_tmp_table->distinct && !group_list) ||
3101 	 select_lex->have_window_funcs()))
3102     {
3103       /*
3104         If there is no select distinct and there are no window functions
3105         then move the having to table conds of tmp table.
3106         NOTE : We cannot apply having after distinct or window functions
3107                If columns of having are not part of select distinct,
3108                then distinct may remove rows which can satisfy having.
3109                In the case of window functions we *must* make sure to not
3110                store any rows which don't match HAVING within the temp table,
3111                as rows will end up being used during their computation.
3112       */
3113       if (!select_distinct && !select_lex->have_window_funcs() &&
3114           add_having_as_table_cond(curr_tab))
3115         DBUG_RETURN(true);
3116       is_having_added_as_table_cond= tmp_having != having;
3117 
3118       /*
3119         Having condition which we are not able to add as tmp table conds are
3120         kept as before. And, this will be applied before storing the rows in
3121         tmp table.
3122       */
3123       curr_tab->having= having;
3124       having= NULL; // Already done
3125     }
3126 
3127     tmp_table_param.func_count= 0;
3128     tmp_table_param.field_count+= tmp_table_param.func_count;
3129     if (sort_and_group || curr_tab->table->group)
3130     {
3131       tmp_table_param.field_count+= tmp_table_param.sum_func_count;
3132       tmp_table_param.sum_func_count= 0;
3133     }
3134 
3135     if (exec_tmp_table->group)
3136     {						// Already grouped
3137       if (!order && !no_order && !skip_sort_order)
3138         order= group_list;  /* order by group */
3139       group_list= NULL;
3140     }
3141 
3142     /*
3143       If we have different sort & group then we must sort the data by group
3144       and copy it to another tmp table
3145       This code is also used if we are using distinct something
3146       we haven't been able to store in the temporary table yet
3147       like SEC_TO_TIME(SUM(...)).
3148     */
3149     if ((group_list &&
3150          (!test_if_subpart(group_list, order) || select_distinct)) ||
3151         (select_distinct && tmp_table_param.using_outer_summary_function))
3152     {					/* Must copy to another table */
3153       DBUG_PRINT("info",("Creating group table"));
3154 
3155       calc_group_buffer(this, group_list);
3156       count_field_types(select_lex, &tmp_table_param, tmp_all_fields1,
3157                         select_distinct && !group_list);
3158       tmp_table_param.hidden_field_count=
3159         tmp_all_fields1.elements - tmp_fields_list1.elements;
3160 
3161       curr_tab++;
3162       aggr_tables++;
3163       bzero((void*)curr_tab, sizeof(JOIN_TAB));
3164       curr_tab->ref.key= -1;
3165 
3166       /* group data to new table */
3167       /*
3168         If the access method is loose index scan then all MIN/MAX
3169         functions are precomputed, and should be treated as regular
3170         functions. See extended comment above.
3171       */
3172       if (join_tab->is_using_loose_index_scan())
3173         tmp_table_param.precomputed_group_by= TRUE;
3174 
3175       tmp_table_param.hidden_field_count=
3176         curr_all_fields->elements - curr_fields_list->elements;
3177       ORDER *dummy= NULL; //TODO can use table->group here also
3178 
3179       if (create_postjoin_aggr_table(curr_tab, curr_all_fields, dummy, true,
3180                                      distinct, keep_row_order))
3181 	DBUG_RETURN(true);
3182 
3183       if (group_list)
3184       {
3185         if (!only_const_tables())        // No need to sort a single row
3186         {
3187           if (add_sorting_to_table(curr_tab - 1, group_list))
3188             DBUG_RETURN(true);
3189         }
3190 
3191         if (make_group_fields(this, this))
3192           DBUG_RETURN(true);
3193       }
3194 
3195       // Setup sum funcs only when necessary, otherwise we might break info
3196       // for the first table
3197       if (group_list || tmp_table_param.sum_func_count)
3198       {
3199         if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true, true))
3200           DBUG_RETURN(true);
3201         if (prepare_sum_aggregators(sum_funcs,
3202                                     !join_tab->is_using_agg_loose_index_scan()))
3203           DBUG_RETURN(true);
3204         group_list= NULL;
3205         if (setup_sum_funcs(thd, sum_funcs))
3206           DBUG_RETURN(true);
3207       }
3208       // No sum funcs anymore
3209       DBUG_ASSERT(items2.is_null());
3210 
3211       items2= ref_ptr_array_slice(3);
3212       if (change_to_use_tmp_fields(thd, items2,
3213                                    tmp_fields_list2, tmp_all_fields2,
3214                                    fields_list.elements, tmp_all_fields1))
3215         DBUG_RETURN(true);
3216 
3217       curr_fields_list= &tmp_fields_list2;
3218       curr_all_fields= &tmp_all_fields2;
3219       set_items_ref_array(items2);
3220       curr_tab->ref_array= &items2;
3221       curr_tab->all_fields= &tmp_all_fields2;
3222       curr_tab->fields= &tmp_fields_list2;
3223       set_postjoin_aggr_write_func(curr_tab);
3224 
3225       tmp_table_param.field_count+= tmp_table_param.sum_func_count;
3226       tmp_table_param.sum_func_count= 0;
3227     }
3228     if (curr_tab->table->distinct)
3229       select_distinct= false;               /* Each row is unique */
3230 
3231     if (select_distinct && !group_list)
3232     {
3233       if (having)
3234       {
3235         curr_tab->having= having;
3236         having->update_used_tables();
3237       }
3238       /*
3239         We only need DISTINCT operation if the join is not degenerate.
3240         If it is, we must not request DISTINCT processing, because
3241         remove_duplicates() assumes there is a preceding computation step (and
3242         in the degenerate join, there's none)
3243       */
3244       if (top_join_tab_count && tables_list)
3245         curr_tab->distinct= true;
3246 
3247       having= NULL;
3248       select_distinct= false;
3249     }
3250     /* Clean tmp_table_param for the next tmp table. */
3251     tmp_table_param.field_count= tmp_table_param.sum_func_count=
3252       tmp_table_param.func_count= 0;
3253 
3254     tmp_table_param.copy_field= tmp_table_param.copy_field_end=0;
3255     first_record= sort_and_group=0;
3256 
3257     if (!group_optimized_away || implicit_grouping_with_window_funcs)
3258     {
3259       group= false;
3260     }
3261     else
3262     {
3263       /*
3264         If grouping has been optimized away, a temporary table is
3265         normally not needed unless we're explicitly requested to create
3266         one (e.g. due to a SQL_BUFFER_RESULT hint or INSERT ... SELECT).
3267 
3268         In this case (grouping was optimized away), temp_table was
3269         created without a grouping expression and JOIN::exec() will not
3270         perform the necessary grouping (by the use of end_send_group()
3271         or end_write_group()) if JOIN::group is set to false.
3272       */
3273       // the temporary table was explicitly requested
3274       DBUG_ASSERT(MY_TEST(select_options & OPTION_BUFFER_RESULT));
3275       // the temporary table does not have a grouping expression
3276       DBUG_ASSERT(!curr_tab->table->group);
3277     }
3278     calc_group_buffer(this, group_list);
3279     count_field_types(select_lex, &tmp_table_param, *curr_all_fields, false);
3280   }
3281 
3282   if (group ||
3283       (implicit_grouping  && !implicit_grouping_with_window_funcs) ||
3284       tmp_table_param.sum_func_count)
3285   {
3286     if (make_group_fields(this, this))
3287       DBUG_RETURN(true);
3288 
3289     DBUG_ASSERT(items3.is_null());
3290 
3291     if (items0.is_null())
3292       init_items_ref_array();
3293     items3= ref_ptr_array_slice(4);
3294     setup_copy_fields(thd, &tmp_table_param,
3295                       items3, tmp_fields_list3, tmp_all_fields3,
3296                       curr_fields_list->elements, *curr_all_fields);
3297 
3298     curr_fields_list= &tmp_fields_list3;
3299     curr_all_fields= &tmp_all_fields3;
3300     set_items_ref_array(items3);
3301     if (join_tab)
3302     {
3303       JOIN_TAB *last_tab= join_tab + top_join_tab_count + aggr_tables - 1;
3304       // Set grouped fields on the last table
3305       last_tab->ref_array= &items3;
3306       last_tab->all_fields= &tmp_all_fields3;
3307       last_tab->fields= &tmp_fields_list3;
3308     }
3309     if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true, true))
3310       DBUG_RETURN(true);
3311     if (prepare_sum_aggregators(sum_funcs,
3312                                 !join_tab ||
3313                                 !join_tab-> is_using_agg_loose_index_scan()))
3314       DBUG_RETURN(true);
3315     if (unlikely(setup_sum_funcs(thd, sum_funcs) || thd->is_fatal_error))
3316       DBUG_RETURN(true);
3317   }
3318   if (group_list || order)
3319   {
3320     DBUG_PRINT("info",("Sorting for send_result_set_metadata"));
3321     THD_STAGE_INFO(thd, stage_sorting_result);
3322     /* If we have already done the group, add HAVING to sorted table */
3323     if (tmp_having && !is_having_added_as_table_cond &&
3324         !group_list && !sort_and_group)
3325     {
3326       if (add_having_as_table_cond(curr_tab))
3327         DBUG_RETURN(true);
3328     }
3329 
3330     if (group)
3331       select_limit= HA_POS_ERROR;
3332     else if (!need_tmp)
3333     {
3334       /*
3335         We can abort sorting after thd->select_limit rows if there are no
3336         filter conditions for any tables after the sorted one.
3337         Filter conditions come in several forms:
3338          1. as a condition item attached to the join_tab, or
3339          2. as a keyuse attached to the join_tab (ref access).
3340       */
3341       for (uint i= const_tables + 1; i < top_join_tab_count; i++)
3342       {
3343         JOIN_TAB *const tab= join_tab + i;
3344         if (tab->select_cond ||                                // 1
3345             (tab->keyuse && !tab->first_inner))                // 2
3346         {
3347           /* We have to sort all rows */
3348           select_limit= HA_POS_ERROR;
3349           break;
3350         }
3351       }
3352     }
3353     /*
3354       Here we add sorting stage for ORDER BY/GROUP BY clause, if the
3355       optimiser chose FILESORT to be faster than INDEX SCAN or there is
3356       no suitable index present.
3357       OPTION_FOUND_ROWS supersedes LIMIT and is taken into account.
3358     */
3359     DBUG_PRINT("info",("Sorting for order by/group by"));
3360     ORDER *order_arg= group_list ?  group_list : order;
3361     if (top_join_tab_count + aggr_tables > const_tables &&
3362         ordered_index_usage !=
3363         (group_list ? ordered_index_group_by : ordered_index_order_by) &&
3364         curr_tab->type != JT_CONST &&
3365         curr_tab->type != JT_EQ_REF) // Don't sort 1 row
3366     {
3367       // Sort either first non-const table or the last tmp table
3368       JOIN_TAB *sort_tab= curr_tab;
3369 
3370       if (add_sorting_to_table(sort_tab, order_arg))
3371         DBUG_RETURN(true);
3372       /*
3373         filesort_limit:	 Return only this many rows from filesort().
3374         We can use select_limit_cnt only if we have no group_by and 1 table.
3375         This allows us to use Bounded_queue for queries like:
3376           "select SQL_CALC_FOUND_ROWS * from t1 order by b desc limit 1;"
3377         m_select_limit == HA_POS_ERROR (we need a full table scan)
3378         unit->select_limit_cnt == 1 (we only need one row in the result set)
3379       */
3380       sort_tab->filesort->limit=
3381         (has_group_by || (join_tab + table_count > curr_tab + 1)) ?
3382          select_limit : unit->select_limit_cnt;
3383     }
3384     if (!only_const_tables() &&
3385         !join_tab[const_tables].filesort &&
3386         !(select_options & SELECT_DESCRIBE))
3387     {
3388       /*
3389         If no IO cache exists for the first table then we are using an
3390         INDEX SCAN and no filesort. Thus we should not remove the sorted
3391         attribute on the INDEX SCAN.
3392       */
3393       skip_sort_order= true;
3394     }
3395   }
3396 
3397   /*
3398     Window functions computation step should be attached to the last join_tab
3399     that's doing aggregation.
3400     The last join_tab reads the data from the temp. table.  It also may do
3401     - sorting
3402     - duplicate value removal
3403     Both of these operations are done after window function computation step.
3404   */
3405   curr_tab= join_tab + total_join_tab_cnt();
3406   if (select_lex->window_funcs.elements)
3407   {
3408     if (!(curr_tab->window_funcs_step= new Window_funcs_computation))
3409       DBUG_RETURN(true);
3410     if (curr_tab->window_funcs_step->setup(thd, &select_lex->window_funcs,
3411                                            curr_tab))
3412       DBUG_RETURN(true);
3413     /* Count that we're using window functions. */
3414     status_var_increment(thd->status_var.feature_window_functions);
3415   }
3416   if (select_lex->custom_agg_func_used())
3417     status_var_increment(thd->status_var.feature_custom_aggregate_functions);
3418 
3419   fields= curr_fields_list;
3420   // Reset before execution
3421   set_items_ref_array(items0);
3422   if (join_tab)
3423     join_tab[exec_join_tab_cnt() + aggr_tables - 1].next_select=
3424       setup_end_select_func(this, NULL);
3425   group= has_group_by;
3426 
3427   DBUG_RETURN(false);
3428 }
3429 
3430 
3431 
3432 bool
create_postjoin_aggr_table(JOIN_TAB * tab,List<Item> * table_fields,ORDER * table_group,bool save_sum_fields,bool distinct,bool keep_row_order)3433 JOIN::create_postjoin_aggr_table(JOIN_TAB *tab, List<Item> *table_fields,
3434                                  ORDER *table_group,
3435                                  bool save_sum_fields,
3436                                  bool distinct,
3437                                  bool keep_row_order)
3438 {
3439   DBUG_ENTER("JOIN::create_postjoin_aggr_table");
3440   THD_STAGE_INFO(thd, stage_creating_tmp_table);
3441 
3442   /*
3443     Pushing LIMIT to the post-join temporary table creation is not applicable
3444     when there is ORDER BY or GROUP BY or there is no GROUP BY, but
3445     there are aggregate functions, because in all these cases we need
3446     all result rows.
3447   */
3448   ha_rows table_rows_limit= ((order == NULL || skip_sort_order) &&
3449                               !table_group &&
3450                               !select_lex->with_sum_func) ? select_limit
3451                                                           : HA_POS_ERROR;
3452 
3453   if (!(tab->tmp_table_param= new TMP_TABLE_PARAM(tmp_table_param)))
3454     DBUG_RETURN(true);
3455   if (tmp_table_keep_current_rowid)
3456     add_fields_for_current_rowid(tab, table_fields);
3457   tab->tmp_table_param->skip_create_table= true;
3458   TABLE* table= create_tmp_table(thd, tab->tmp_table_param, *table_fields,
3459                                  table_group, distinct,
3460                                  save_sum_fields, select_options, table_rows_limit,
3461                                  &empty_clex_str, true, keep_row_order);
3462   if (!table)
3463     DBUG_RETURN(true);
3464   tmp_table_param.using_outer_summary_function=
3465     tab->tmp_table_param->using_outer_summary_function;
3466   tab->join= this;
3467   DBUG_ASSERT(tab > tab->join->join_tab || !top_join_tab_count || !tables_list);
3468   if (tab > join_tab)
3469     (tab - 1)->next_select= sub_select_postjoin_aggr;
3470   if (!(tab->aggr= new (thd->mem_root) AGGR_OP(tab)))
3471     goto err;
3472   tab->table= table;
3473   table->reginfo.join_tab= tab;
3474 
3475   /* if group or order on first table, sort first */
3476   if ((group_list && simple_group) ||
3477       (implicit_grouping && select_lex->have_window_funcs()))
3478   {
3479     DBUG_PRINT("info",("Sorting for group"));
3480     THD_STAGE_INFO(thd, stage_sorting_for_group);
3481 
3482     if (ordered_index_usage != ordered_index_group_by &&
3483         !only_const_tables() &&
3484         (join_tab + const_tables)->type != JT_CONST && // Don't sort 1 row
3485         !implicit_grouping &&
3486         add_sorting_to_table(join_tab + const_tables, group_list))
3487       goto err;
3488 
3489     if (alloc_group_fields(this, group_list))
3490       goto err;
3491     if (make_sum_func_list(all_fields, fields_list, true))
3492       goto err;
3493     if (prepare_sum_aggregators(sum_funcs,
3494                                 !(tables_list &&
3495                                   join_tab->is_using_agg_loose_index_scan())))
3496       goto err;
3497     if (setup_sum_funcs(thd, sum_funcs))
3498       goto err;
3499     group_list= NULL;
3500   }
3501   else
3502   {
3503     if (make_sum_func_list(all_fields, fields_list, false))
3504       goto err;
3505     if (prepare_sum_aggregators(sum_funcs,
3506                                 !join_tab->is_using_agg_loose_index_scan()))
3507       goto err;
3508     if (setup_sum_funcs(thd, sum_funcs))
3509       goto err;
3510 
3511     if (!group_list && !table->distinct && order && simple_order &&
3512         tab == join_tab + const_tables)
3513     {
3514       DBUG_PRINT("info",("Sorting for order"));
3515       THD_STAGE_INFO(thd, stage_sorting_for_order);
3516 
3517       if (ordered_index_usage != ordered_index_order_by &&
3518           !only_const_tables() &&
3519           add_sorting_to_table(join_tab + const_tables, order))
3520         goto err;
3521       order= NULL;
3522     }
3523   }
3524 
3525   DBUG_RETURN(false);
3526 
3527 err:
3528   if (table != NULL)
3529     free_tmp_table(thd, table);
3530   DBUG_RETURN(true);
3531 }
3532 
3533 
3534 void
optimize_distinct()3535 JOIN::optimize_distinct()
3536 {
3537   for (JOIN_TAB *last_join_tab= join_tab + top_join_tab_count - 1; ;)
3538   {
3539     if (select_lex->select_list_tables & last_join_tab->table->map ||
3540         last_join_tab->use_join_cache)
3541       break;
3542     last_join_tab->shortcut_for_distinct= true;
3543     if (last_join_tab == join_tab)
3544       break;
3545     --last_join_tab;
3546   }
3547 
3548   /* Optimize "select distinct b from t1 order by key_part_1 limit #" */
3549   if (order && skip_sort_order)
3550   {
3551     /* Should already have been optimized away */
3552     DBUG_ASSERT(ordered_index_usage == ordered_index_order_by);
3553     if (ordered_index_usage == ordered_index_order_by)
3554     {
3555       order= NULL;
3556     }
3557   }
3558 }
3559 
3560 
3561 /**
3562   @brief Add Filesort object to the given table to sort if with filesort
3563 
3564   @param tab   the JOIN_TAB object to attach created Filesort object to
3565   @param order List of expressions to sort the table by
3566 
3567   @note This function moves tab->select, if any, to filesort->select
3568 
3569   @return false on success, true on OOM
3570 */
3571 
3572 bool
add_sorting_to_table(JOIN_TAB * tab,ORDER * order)3573 JOIN::add_sorting_to_table(JOIN_TAB *tab, ORDER *order)
3574 {
3575   tab->filesort=
3576     new (thd->mem_root) Filesort(order, HA_POS_ERROR, tab->keep_current_rowid,
3577                                  tab->select);
3578   if (!tab->filesort)
3579     return true;
3580   /*
3581     Select was moved to filesort->select to force join_init_read_record to use
3582     sorted result instead of reading table through select.
3583   */
3584   if (tab->select)
3585   {
3586     tab->select= NULL;
3587     tab->set_select_cond(NULL, __LINE__);
3588   }
3589   tab->read_first_record= join_init_read_record;
3590   return false;
3591 }
3592 
3593 
3594 
3595 
3596 /**
3597   Setup expression caches for subqueries that need them
3598 
3599   @details
3600   The function wraps correlated subquery expressions that return one value
3601   into objects of the class Item_cache_wrapper setting up an expression
3602   cache for each of them. The result values of the subqueries are to be
3603   cached together with the corresponding sets of the parameters - outer
3604   references of the subqueries.
3605 
3606   @retval FALSE OK
3607   @retval TRUE  Error
3608 */
3609 
setup_subquery_caches()3610 bool JOIN::setup_subquery_caches()
3611 {
3612   DBUG_ENTER("JOIN::setup_subquery_caches");
3613 
3614   /*
3615     We have to check all this condition together because items created in
3616     one of this clauses can be moved to another one by optimizer
3617   */
3618   if (select_lex->expr_cache_may_be_used[IN_WHERE] ||
3619       select_lex->expr_cache_may_be_used[IN_HAVING] ||
3620       select_lex->expr_cache_may_be_used[IN_ON] ||
3621       select_lex->expr_cache_may_be_used[NO_MATTER])
3622   {
3623     JOIN_TAB *tab;
3624     if (conds &&
3625         !(conds= conds->transform(thd, &Item::expr_cache_insert_transformer,
3626                                   NULL)))
3627       DBUG_RETURN(TRUE);
3628     for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
3629          tab; tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
3630     {
3631       if (tab->select_cond &&
3632           !(tab->select_cond=
3633             tab->select_cond->transform(thd,
3634                                         &Item::expr_cache_insert_transformer,
3635                                         NULL)))
3636 	DBUG_RETURN(TRUE);
3637       if (tab->cache_select && tab->cache_select->cond)
3638         if (!(tab->cache_select->cond=
3639               tab->cache_select->
3640               cond->transform(thd, &Item::expr_cache_insert_transformer,
3641                               NULL)))
3642           DBUG_RETURN(TRUE);
3643     }
3644 
3645     if (having &&
3646         !(having= having->transform(thd,
3647                                     &Item::expr_cache_insert_transformer,
3648                                     NULL)))
3649       DBUG_RETURN(TRUE);
3650 
3651     if (tmp_having)
3652     {
3653       DBUG_ASSERT(having == NULL);
3654       if (!(tmp_having=
3655             tmp_having->transform(thd,
3656                                   &Item::expr_cache_insert_transformer,
3657                                   NULL)))
3658 	DBUG_RETURN(TRUE);
3659     }
3660   }
3661   if (select_lex->expr_cache_may_be_used[SELECT_LIST] ||
3662       select_lex->expr_cache_may_be_used[IN_GROUP_BY] ||
3663       select_lex->expr_cache_may_be_used[NO_MATTER])
3664   {
3665     List_iterator<Item> li(all_fields);
3666     Item *item;
3667     while ((item= li++))
3668     {
3669       Item *new_item;
3670       if (!(new_item=
3671             item->transform(thd, &Item::expr_cache_insert_transformer,
3672                             NULL)))
3673         DBUG_RETURN(TRUE);
3674       if (new_item != item)
3675       {
3676         thd->change_item_tree(li.ref(), new_item);
3677       }
3678     }
3679     for (ORDER *tmp_group= group_list; tmp_group ; tmp_group= tmp_group->next)
3680     {
3681       if (!(*tmp_group->item=
3682             (*tmp_group->item)->transform(thd,
3683                                           &Item::expr_cache_insert_transformer,
3684                                           NULL)))
3685         DBUG_RETURN(TRUE);
3686     }
3687   }
3688   if (select_lex->expr_cache_may_be_used[NO_MATTER])
3689   {
3690     for (ORDER *ord= order; ord; ord= ord->next)
3691     {
3692       if (!(*ord->item=
3693             (*ord->item)->transform(thd,
3694                                     &Item::expr_cache_insert_transformer,
3695                                     NULL)))
3696 	DBUG_RETURN(TRUE);
3697     }
3698   }
3699   DBUG_RETURN(FALSE);
3700 }
3701 
3702 
3703 /*
3704   Shrink join buffers used for preceding tables to reduce the occupied space
3705 
3706   SYNOPSIS
3707     shrink_join_buffers()
3708       jt           table up to which the buffers are to be shrunk
3709       curr_space   the size of the space used by the buffers for tables 1..jt
3710       needed_space the size of the space that has to be used by these buffers
3711 
3712   DESCRIPTION
3713     The function makes an attempt to shrink all join buffers used for the
3714     tables starting from the first up to jt to reduce the total size of the
3715     space occupied by the buffers used for tables 1,...,jt  from curr_space
3716     to needed_space.
3717     The function assumes that the buffer for the table jt has not been
3718     allocated yet.
3719 
3720   RETURN
3721     FALSE     if all buffer have been successfully shrunk
3722     TRUE      otherwise
3723 */
3724 
shrink_join_buffers(JOIN_TAB * jt,ulonglong curr_space,ulonglong needed_space)3725 bool JOIN::shrink_join_buffers(JOIN_TAB *jt,
3726                                ulonglong curr_space,
3727                                ulonglong needed_space)
3728 {
3729   JOIN_TAB *tab;
3730   JOIN_CACHE *cache;
3731   for (tab= first_linear_tab(this, WITHOUT_BUSH_ROOTS, WITHOUT_CONST_TABLES);
3732        tab != jt;
3733        tab= next_linear_tab(this, tab, WITHOUT_BUSH_ROOTS))
3734   {
3735     cache= tab->cache;
3736     if (cache)
3737     {
3738       size_t buff_size;
3739       if (needed_space < cache->get_min_join_buffer_size())
3740         return TRUE;
3741       if (cache->shrink_join_buffer_in_ratio(curr_space, needed_space))
3742       {
3743         revise_cache_usage(tab);
3744         return TRUE;
3745       }
3746       buff_size= cache->get_join_buffer_size();
3747       curr_space-= buff_size;
3748       needed_space-= buff_size;
3749     }
3750   }
3751 
3752   cache= jt->cache;
3753   DBUG_ASSERT(cache);
3754   if (needed_space < cache->get_min_join_buffer_size())
3755     return TRUE;
3756   cache->set_join_buffer_size((size_t)needed_space);
3757 
3758   return FALSE;
3759 }
3760 
3761 
3762 int
reinit()3763 JOIN::reinit()
3764 {
3765   DBUG_ENTER("JOIN::reinit");
3766 
3767   unit->offset_limit_cnt= (ha_rows)(select_lex->offset_limit ?
3768                                     select_lex->offset_limit->val_uint() : 0);
3769 
3770   first_record= false;
3771   group_sent= false;
3772   cleaned= false;
3773 
3774   if (aggr_tables)
3775   {
3776     JOIN_TAB *curr_tab= join_tab + exec_join_tab_cnt();
3777     JOIN_TAB *end_tab= curr_tab + aggr_tables;
3778     for ( ; curr_tab < end_tab; curr_tab++)
3779     {
3780       TABLE *tmp_table= curr_tab->table;
3781       if (!tmp_table->is_created())
3782         continue;
3783       tmp_table->file->extra(HA_EXTRA_RESET_STATE);
3784       tmp_table->file->ha_delete_all_rows();
3785     }
3786   }
3787   clear_sj_tmp_tables(this);
3788   if (current_ref_ptrs != items0)
3789   {
3790     set_items_ref_array(items0);
3791     set_group_rpa= false;
3792   }
3793 
3794   /* need to reset ref access state (see join_read_key) */
3795   if (join_tab)
3796   {
3797     JOIN_TAB *tab;
3798     for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITH_CONST_TABLES); tab;
3799          tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
3800     {
3801       tab->ref.key_err= TRUE;
3802     }
3803   }
3804 
3805   /* Reset of sum functions */
3806   if (sum_funcs)
3807   {
3808     Item_sum *func, **func_ptr= sum_funcs;
3809     while ((func= *(func_ptr++)))
3810       func->clear();
3811   }
3812 
3813   if (no_rows_in_result_called)
3814   {
3815     /* Reset effect of possible no_rows_in_result() */
3816     List_iterator_fast<Item> it(fields_list);
3817     Item *item;
3818     no_rows_in_result_called= 0;
3819     while ((item= it++))
3820       item->restore_to_before_no_rows_in_result();
3821   }
3822 
3823   if (!(select_options & SELECT_DESCRIBE))
3824     if (init_ftfuncs(thd, select_lex, MY_TEST(order)))
3825       DBUG_RETURN(1);
3826 
3827   DBUG_RETURN(0);
3828 }
3829 
3830 
3831 /**
3832   Prepare join result.
3833 
3834   @details Prepare join result prior to join execution or describing.
3835   Instantiate derived tables and get schema tables result if necessary.
3836 
3837   @return
3838     TRUE  An error during derived or schema tables instantiation.
3839     FALSE Ok
3840 */
3841 
prepare_result(List<Item> ** columns_list)3842 bool JOIN::prepare_result(List<Item> **columns_list)
3843 {
3844   DBUG_ENTER("JOIN::prepare_result");
3845 
3846   error= 0;
3847   /* Create result tables for materialized views. */
3848   if (!zero_result_cause &&
3849       select_lex->handle_derived(thd->lex, DT_CREATE))
3850     goto err;
3851 
3852   if (result->prepare2(this))
3853     goto err;
3854 
3855   if ((select_lex->options & OPTION_SCHEMA_TABLE) &&
3856       get_schema_tables_result(this, PROCESSED_BY_JOIN_EXEC))
3857     goto err;
3858 
3859   DBUG_RETURN(FALSE);
3860 
3861 err:
3862   error= 1;
3863   DBUG_RETURN(TRUE);
3864 }
3865 
3866 
3867 /**
3868    @retval
3869    0 ok
3870    1 error
3871 */
3872 
3873 
save_explain_data(Explain_query * output,bool can_overwrite,bool need_tmp_table,bool need_order,bool distinct)3874 bool JOIN::save_explain_data(Explain_query *output, bool can_overwrite,
3875                              bool need_tmp_table, bool need_order,
3876                              bool distinct)
3877 {
3878   /*
3879     If there is SELECT in this statement with the same number it must be the
3880     same SELECT
3881   */
3882   DBUG_SLOW_ASSERT(select_lex->select_number == UINT_MAX ||
3883               select_lex->select_number == INT_MAX ||
3884               !output ||
3885               !output->get_select(select_lex->select_number) ||
3886               output->get_select(select_lex->select_number)->select_lex ==
3887                 select_lex);
3888 
3889   if (select_lex->select_number != UINT_MAX &&
3890       select_lex->select_number != INT_MAX /* this is not a UNION's "fake select */ &&
3891       have_query_plan != JOIN::QEP_NOT_PRESENT_YET &&
3892       have_query_plan != JOIN::QEP_DELETED &&  // this happens when there was
3893                                                // no QEP ever, but then
3894                                                //cleanup() is called multiple times
3895       output && // for "SET" command in SPs.
3896       (can_overwrite? true: !output->get_select(select_lex->select_number)))
3897   {
3898     const char *message= NULL;
3899     if (!table_count || !tables_list || zero_result_cause)
3900     {
3901       /* It's a degenerate join */
3902       message= zero_result_cause ? zero_result_cause : "No tables used";
3903     }
3904     return save_explain_data_intern(thd->lex->explain, need_tmp_table, need_order,
3905                                     distinct, message);
3906   }
3907 
3908   /*
3909     Can have join_tab==NULL for degenerate cases (e.g. SELECT .. UNION ... SELECT LIMIT 0)
3910   */
3911   if (select_lex == select_lex->master_unit()->fake_select_lex && join_tab)
3912   {
3913     /*
3914       This is fake_select_lex. It has no query plan, but we need to set up a
3915       tracker for ANALYZE
3916     */
3917     uint nr= select_lex->master_unit()->first_select()->select_number;
3918     Explain_union *eu= output->get_union(nr);
3919     explain= &eu->fake_select_lex_explain;
3920     join_tab[0].tracker= eu->get_fake_select_lex_tracker();
3921     for (uint i=0 ; i < exec_join_tab_cnt() + aggr_tables; i++)
3922     {
3923       if (join_tab[i].filesort)
3924       {
3925         if (!(join_tab[i].filesort->tracker=
3926               new Filesort_tracker(thd->lex->analyze_stmt)))
3927           return 1;
3928       }
3929     }
3930   }
3931   return 0;
3932 }
3933 
3934 
exec()3935 void JOIN::exec()
3936 {
3937   DBUG_EXECUTE_IF("show_explain_probe_join_exec_start",
3938                   if (dbug_user_var_equals_int(thd,
3939                                                "show_explain_probe_select_id",
3940                                                select_lex->select_number))
3941                         dbug_serve_apcs(thd, 1);
3942                  );
3943   ANALYZE_START_TRACKING(&explain->time_tracker);
3944   exec_inner();
3945   ANALYZE_STOP_TRACKING(&explain->time_tracker);
3946 
3947   DBUG_EXECUTE_IF("show_explain_probe_join_exec_end",
3948                   if (dbug_user_var_equals_int(thd,
3949                                                "show_explain_probe_select_id",
3950                                                select_lex->select_number))
3951                         dbug_serve_apcs(thd, 1);
3952                  );
3953 }
3954 
3955 
exec_inner()3956 void JOIN::exec_inner()
3957 {
3958   List<Item> *columns_list= &fields_list;
3959   DBUG_ENTER("JOIN::exec_inner");
3960   DBUG_ASSERT(optimization_state == JOIN::OPTIMIZATION_DONE);
3961 
3962   THD_STAGE_INFO(thd, stage_executing);
3963 
3964   /*
3965     Enable LIMIT ROWS EXAMINED during query execution if:
3966     (1) This JOIN is the outermost query (not a subquery or derived table)
3967         This ensures that the limit is enabled when actual execution begins, and
3968         not if a subquery is evaluated during optimization of the outer query.
3969     (2) This JOIN is not the result of a UNION. In this case do not apply the
3970         limit in order to produce the partial query result stored in the
3971         UNION temp table.
3972   */
3973   if (!select_lex->outer_select() &&                            // (1)
3974       select_lex != select_lex->master_unit()->fake_select_lex) // (2)
3975     thd->lex->set_limit_rows_examined();
3976 
3977   if (procedure)
3978   {
3979     procedure_fields_list= fields_list;
3980     if (procedure->change_columns(thd, procedure_fields_list) ||
3981 	result->prepare(procedure_fields_list, unit))
3982     {
3983       thd->set_examined_row_count(0);
3984       thd->limit_found_rows= 0;
3985       DBUG_VOID_RETURN;
3986     }
3987     columns_list= &procedure_fields_list;
3988   }
3989   if (result->prepare2(this))
3990     DBUG_VOID_RETURN;
3991 
3992   if (!tables_list && (table_count || !select_lex->with_sum_func) &&
3993       !select_lex->have_window_funcs())
3994   {                                           // Only test of functions
3995     if (select_options & SELECT_DESCRIBE)
3996       select_describe(this, FALSE, FALSE, FALSE,
3997 		      (zero_result_cause?zero_result_cause:"No tables used"));
3998 
3999     else
4000     {
4001       if (result->send_result_set_metadata(*columns_list,
4002                                            Protocol::SEND_NUM_ROWS |
4003                                            Protocol::SEND_EOF))
4004       {
4005         DBUG_VOID_RETURN;
4006       }
4007 
4008       /*
4009         We have to test for 'conds' here as the WHERE may not be constant
4010         even if we don't have any tables for prepared statements or if
4011         conds uses something like 'rand()'.
4012         If the HAVING clause is either impossible or always true, then
4013         JOIN::having is set to NULL by optimize_cond.
4014         In this case JOIN::exec must check for JOIN::having_value, in the
4015         same way it checks for JOIN::cond_value.
4016       */
4017       DBUG_ASSERT(error == 0);
4018       if (cond_value != Item::COND_FALSE &&
4019           having_value != Item::COND_FALSE &&
4020           (!conds || conds->val_int()) &&
4021           (!having || having->val_int()))
4022       {
4023 	if (do_send_rows &&
4024             (procedure ? (procedure->send_row(procedure_fields_list) ||
4025              procedure->end_of_records()) : result->send_data(fields_list)> 0))
4026 	  error= 1;
4027 	else
4028 	  send_records= ((select_options & OPTION_FOUND_ROWS) ? 1 :
4029                          thd->get_sent_row_count());
4030       }
4031       else
4032         send_records= 0;
4033       if (likely(!error))
4034       {
4035         join_free();                      // Unlock all cursors
4036         error= (int) result->send_eof();
4037       }
4038     }
4039     /* Single select (without union) always returns 0 or 1 row */
4040     thd->limit_found_rows= send_records;
4041     thd->set_examined_row_count(0);
4042     DBUG_VOID_RETURN;
4043   }
4044 
4045   /*
4046     Evaluate expensive constant conditions that were not evaluated during
4047     optimization. Do not evaluate them for EXPLAIN statements as these
4048     condtions may be arbitrarily costly, and because the optimize phase
4049     might not have produced a complete executable plan for EXPLAINs.
4050   */
4051   if (!zero_result_cause &&
4052       exec_const_cond && !(select_options & SELECT_DESCRIBE) &&
4053       !exec_const_cond->val_int())
4054     zero_result_cause= "Impossible WHERE noticed after reading const tables";
4055 
4056   /*
4057     We've called exec_const_cond->val_int(). This may have caused an error.
4058   */
4059   if (unlikely(thd->is_error()))
4060   {
4061     error= thd->is_error();
4062     DBUG_VOID_RETURN;
4063   }
4064 
4065   if (zero_result_cause)
4066   {
4067     if (select_lex->have_window_funcs() && send_row_on_empty_set())
4068     {
4069       /*
4070         The query produces just one row but it has window functions.
4071 
4072         The only way to compute the value of window function(s) is to
4073         run the entire window function computation step (there is no shortcut).
4074       */
4075       const_tables= table_count;
4076       first_select= sub_select_postjoin_aggr;
4077     }
4078     else
4079     {
4080       (void) return_zero_rows(this, result, select_lex->leaf_tables,
4081                               *columns_list,
4082 			      send_row_on_empty_set(),
4083 			      select_options,
4084 			      zero_result_cause,
4085 			      having ? having : tmp_having, all_fields);
4086       DBUG_VOID_RETURN;
4087     }
4088   }
4089 
4090   /*
4091     Evaluate all constant expressions with subqueries in the
4092     ORDER/GROUP clauses to make sure that all subqueries return a
4093     single row. The evaluation itself will trigger an error if that is
4094     not the case.
4095   */
4096   if (exec_const_order_group_cond.elements &&
4097       !(select_options & SELECT_DESCRIBE))
4098   {
4099     List_iterator_fast<Item> const_item_it(exec_const_order_group_cond);
4100     Item *cur_const_item;
4101     while ((cur_const_item= const_item_it++))
4102     {
4103       cur_const_item->val_str(); // This caches val_str() to Item::str_value
4104       if (unlikely(thd->is_error()))
4105       {
4106         error= thd->is_error();
4107         DBUG_VOID_RETURN;
4108       }
4109     }
4110   }
4111 
4112   if ((this->select_lex->options & OPTION_SCHEMA_TABLE) &&
4113       get_schema_tables_result(this, PROCESSED_BY_JOIN_EXEC))
4114     DBUG_VOID_RETURN;
4115 
4116   if (select_options & SELECT_DESCRIBE)
4117   {
4118     select_describe(this, need_tmp,
4119 		    order != 0 && !skip_sort_order,
4120 		    select_distinct,
4121                     !table_count ? "No tables used" : NullS);
4122     DBUG_VOID_RETURN;
4123   }
4124   else
4125   {
4126     /* it's a const select, materialize it. */
4127     select_lex->mark_const_derived(zero_result_cause);
4128   }
4129 
4130   /*
4131     Initialize examined rows here because the values from all join parts
4132     must be accumulated in examined_row_count. Hence every join
4133     iteration must count from zero.
4134   */
4135   join_examined_rows= 0;
4136 
4137   /* XXX: When can we have here thd->is_error() not zero? */
4138   if (unlikely(thd->is_error()))
4139   {
4140     error= thd->is_error();
4141     DBUG_VOID_RETURN;
4142   }
4143 
4144   THD_STAGE_INFO(thd, stage_sending_data);
4145   DBUG_PRINT("info", ("%s", thd->proc_info));
4146   result->send_result_set_metadata(
4147                  procedure ? procedure_fields_list : *fields,
4148                  Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF);
4149 
4150   error= result->view_structure_only() ? false : do_select(this, procedure);
4151   /* Accumulate the counts from all join iterations of all join parts. */
4152   thd->inc_examined_row_count(join_examined_rows);
4153   DBUG_PRINT("counts", ("thd->examined_row_count: %lu",
4154                         (ulong) thd->get_examined_row_count()));
4155 
4156   DBUG_VOID_RETURN;
4157 }
4158 
4159 
4160 /**
4161   Clean up join.
4162 
4163   @return
4164     Return error that hold JOIN.
4165 */
4166 
4167 int
destroy()4168 JOIN::destroy()
4169 {
4170   DBUG_ENTER("JOIN::destroy");
4171   select_lex->join= 0;
4172 
4173   cond_equal= 0;
4174   having_equal= 0;
4175 
4176   cleanup(1);
4177 
4178   if (join_tab)
4179   {
4180     for (JOIN_TAB *tab= first_linear_tab(this, WITH_BUSH_ROOTS,
4181                                          WITH_CONST_TABLES);
4182          tab; tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
4183     {
4184       if (tab->aggr)
4185       {
4186         free_tmp_table(thd, tab->table);
4187         delete tab->tmp_table_param;
4188         tab->tmp_table_param= NULL;
4189         tab->aggr= NULL;
4190       }
4191       tab->table= NULL;
4192     }
4193   }
4194 
4195   /* Cleanup items referencing temporary table columns */
4196   cleanup_item_list(tmp_all_fields1);
4197   cleanup_item_list(tmp_all_fields3);
4198   destroy_sj_tmp_tables(this);
4199   delete_dynamic(&keyuse);
4200   if (save_qep)
4201     delete(save_qep);
4202   if (ext_keyuses_for_splitting)
4203     delete(ext_keyuses_for_splitting);
4204   delete procedure;
4205   DBUG_RETURN(error);
4206 }
4207 
4208 
cleanup_item_list(List<Item> & items) const4209 void JOIN::cleanup_item_list(List<Item> &items) const
4210 {
4211   DBUG_ENTER("JOIN::cleanup_item_list");
4212   if (!items.is_empty())
4213   {
4214     List_iterator_fast<Item> it(items);
4215     Item *item;
4216     while ((item= it++))
4217       item->cleanup();
4218   }
4219   DBUG_VOID_RETURN;
4220 }
4221 
4222 
4223 /**
4224   An entry point to single-unit select (a select without UNION).
4225 
4226   @param thd                  thread handler
4227   @param rref_pointer_array   a reference to ref_pointer_array of
4228                               the top-level select_lex for this query
4229   @param tables               list of all tables used in this query.
4230                               The tables have been pre-opened.
4231   @param wild_num             number of wildcards used in the top level
4232                               select of this query.
4233                               For example statement
4234                               SELECT *, t1.*, catalog.t2.* FROM t0, t1, t2;
4235                               has 3 wildcards.
4236   @param fields               list of items in SELECT list of the top-level
4237                               select
4238                               e.g. SELECT a, b, c FROM t1 will have Item_field
4239                               for a, b and c in this list.
4240   @param conds                top level item of an expression representing
4241                               WHERE clause of the top level select
4242   @param og_num               total number of ORDER BY and GROUP BY clauses
4243                               arguments
4244   @param order                linked list of ORDER BY agruments
4245   @param group                linked list of GROUP BY arguments
4246   @param having               top level item of HAVING expression
4247   @param proc_param           list of PROCEDUREs
4248   @param select_options       select options (BIG_RESULT, etc)
4249   @param result               an instance of result set handling class.
4250                               This object is responsible for send result
4251                               set rows to the client or inserting them
4252                               into a table.
4253   @param select_lex           the only SELECT_LEX of this query
4254   @param unit                 top-level UNIT of this query
4255                               UNIT is an artificial object created by the
4256                               parser for every SELECT clause.
4257                               e.g.
4258                               SELECT * FROM t1 WHERE a1 IN (SELECT * FROM t2)
4259                               has 2 unions.
4260 
4261   @retval
4262     FALSE  success
4263   @retval
4264     TRUE   an error
4265 */
4266 
4267 bool
mysql_select(THD * thd,TABLE_LIST * tables,uint wild_num,List<Item> & fields,COND * conds,uint og_num,ORDER * order,ORDER * group,Item * having,ORDER * proc_param,ulonglong select_options,select_result * result,SELECT_LEX_UNIT * unit,SELECT_LEX * select_lex)4268 mysql_select(THD *thd,
4269 	     TABLE_LIST *tables, uint wild_num, List<Item> &fields,
4270 	     COND *conds, uint og_num,  ORDER *order, ORDER *group,
4271 	     Item *having, ORDER *proc_param, ulonglong select_options,
4272 	     select_result *result, SELECT_LEX_UNIT *unit,
4273 	     SELECT_LEX *select_lex)
4274 {
4275   int err= 0;
4276   bool free_join= 1;
4277   DBUG_ENTER("mysql_select");
4278 
4279   if (!fields.is_empty())
4280     select_lex->context.resolve_in_select_list= true;
4281   JOIN *join;
4282   if (select_lex->join != 0)
4283   {
4284     join= select_lex->join;
4285     /*
4286       is it single SELECT in derived table, called in derived table
4287       creation
4288     */
4289     if (select_lex->linkage != DERIVED_TABLE_TYPE ||
4290 	(select_options & SELECT_DESCRIBE))
4291     {
4292       if (select_lex->linkage != GLOBAL_OPTIONS_TYPE)
4293       {
4294         /*
4295           Original join tabs might be overwritten at first
4296           subselect execution. So we need to restore them.
4297         */
4298         Item_subselect *subselect= select_lex->master_unit()->item;
4299         if (subselect && subselect->is_uncacheable() && join->reinit())
4300           DBUG_RETURN(TRUE);
4301       }
4302       else
4303       {
4304         if ((err= join->prepare( tables, wild_num,
4305                                 conds, og_num, order, false, group, having,
4306                                 proc_param, select_lex, unit)))
4307 	{
4308 	  goto err;
4309 	}
4310       }
4311     }
4312     free_join= 0;
4313     join->select_options= select_options;
4314   }
4315   else
4316   {
4317     if (thd->lex->describe)
4318       select_options|= SELECT_DESCRIBE;
4319 
4320     /*
4321       When in EXPLAIN, delay deleting the joins so that they are still
4322       available when we're producing EXPLAIN EXTENDED warning text.
4323     */
4324     if (select_options & SELECT_DESCRIBE)
4325       free_join= 0;
4326 
4327     if (!(join= new (thd->mem_root) JOIN(thd, fields, select_options, result)))
4328 	DBUG_RETURN(TRUE);
4329     THD_STAGE_INFO(thd, stage_init);
4330     thd->lex->used_tables=0;
4331     if ((err= join->prepare(tables, wild_num,
4332                             conds, og_num, order, false, group, having, proc_param,
4333                             select_lex, unit)))
4334     {
4335       goto err;
4336     }
4337   }
4338 
4339   if ((err= join->optimize()))
4340   {
4341     goto err;					// 1
4342   }
4343 
4344   if (thd->lex->describe & DESCRIBE_EXTENDED)
4345   {
4346     join->conds_history= join->conds;
4347     join->having_history= (join->having?join->having:join->tmp_having);
4348   }
4349 
4350   if (unlikely(thd->is_error()))
4351     goto err;
4352 
4353   join->exec();
4354 
4355   if (thd->lex->describe & DESCRIBE_EXTENDED)
4356   {
4357     select_lex->where= join->conds_history;
4358     select_lex->having= join->having_history;
4359   }
4360 
4361 err:
4362   if (free_join)
4363   {
4364     THD_STAGE_INFO(thd, stage_end);
4365     err|= (int)(select_lex->cleanup());
4366     DBUG_RETURN(err || thd->is_error());
4367   }
4368   DBUG_RETURN(join->error ? join->error: err);
4369 }
4370 
4371 
4372 /*****************************************************************************
4373   Create JOIN_TABS, make a guess about the table types,
4374   Approximate how many records will be used in each table
4375 *****************************************************************************/
4376 
get_quick_record_count(THD * thd,SQL_SELECT * select,TABLE * table,const key_map * keys,ha_rows limit)4377 static ha_rows get_quick_record_count(THD *thd, SQL_SELECT *select,
4378 				      TABLE *table,
4379 				      const key_map *keys,ha_rows limit)
4380 {
4381   int error;
4382   DBUG_ENTER("get_quick_record_count");
4383   uchar buff[STACK_BUFF_ALLOC];
4384   if (unlikely(check_stack_overrun(thd, STACK_MIN_SIZE, buff)))
4385     DBUG_RETURN(0);                           // Fatal error flag is set
4386   if (select)
4387   {
4388     select->head=table;
4389     table->reginfo.impossible_range=0;
4390     if (likely((error=
4391                 select->test_quick_select(thd, *(key_map *)keys,
4392                                           (table_map) 0,
4393                                           limit, 0, FALSE,
4394                                           TRUE /* remove_where_parts*/)) ==
4395                1))
4396       DBUG_RETURN(select->quick->records);
4397     if (unlikely(error == -1))
4398     {
4399       table->reginfo.impossible_range=1;
4400       DBUG_RETURN(0);
4401     }
4402     DBUG_PRINT("warning",("Couldn't use record count on const keypart"));
4403   }
4404   DBUG_RETURN(HA_POS_ERROR);			/* This shouldn't happend */
4405 }
4406 
4407 /*
4408    This structure is used to collect info on potentially sargable
4409    predicates in order to check whether they become sargable after
4410    reading const tables.
4411    We form a bitmap of indexes that can be used for sargable predicates.
4412    Only such indexes are involved in range analysis.
4413 */
4414 struct SARGABLE_PARAM
4415 {
4416   Field *field;              /* field against which to check sargability */
4417   Item **arg_value;          /* values of potential keys for lookups     */
4418   uint num_values;           /* number of values in the above array      */
4419 };
4420 
4421 
4422 /*
4423   Mark all tables inside a join nest as constant.
4424 
4425   @detail  This is called when there is a local "Impossible WHERE" inside
4426            a multi-table LEFT JOIN.
4427 */
4428 
mark_join_nest_as_const(JOIN * join,TABLE_LIST * join_nest,table_map * found_const_table_map,uint * const_count)4429 void mark_join_nest_as_const(JOIN *join,
4430                              TABLE_LIST *join_nest,
4431                              table_map *found_const_table_map,
4432                              uint *const_count)
4433 {
4434   List_iterator<TABLE_LIST> it(join_nest->nested_join->join_list);
4435   TABLE_LIST *tbl;
4436   while ((tbl= it++))
4437   {
4438     if (tbl->nested_join)
4439     {
4440       mark_join_nest_as_const(join, tbl, found_const_table_map, const_count);
4441       continue;
4442     }
4443     JOIN_TAB *tab= tbl->table->reginfo.join_tab;
4444 
4445     if (!(join->const_table_map & tab->table->map))
4446     {
4447       tab->type= JT_CONST;
4448       tab->info= ET_IMPOSSIBLE_ON_CONDITION;
4449       tab->table->const_table= 1;
4450 
4451       join->const_table_map|= tab->table->map;
4452       *found_const_table_map|= tab->table->map;
4453       set_position(join,(*const_count)++,tab,(KEYUSE*) 0);
4454       mark_as_null_row(tab->table);		// All fields are NULL
4455     }
4456   }
4457 }
4458 
4459 
4460 /*
4461   @brief Get the condition that can be used to do range analysis/partition
4462     pruning/etc
4463 
4464   @detail
4465     Figure out which condition we can use:
4466     - For INNER JOIN, we use the WHERE,
4467     - "t1 LEFT JOIN t2 ON ..." uses t2's ON expression
4468     - "t1 LEFT JOIN (...) ON ..." uses the join nest's ON expression.
4469 */
4470 
get_sargable_cond(JOIN * join,TABLE * table)4471 static Item **get_sargable_cond(JOIN *join, TABLE *table)
4472 {
4473   Item **retval;
4474   if (table->pos_in_table_list->on_expr)
4475   {
4476     /*
4477       This is an inner table from a single-table LEFT JOIN, "t1 LEFT JOIN
4478       t2 ON cond". Use the condition cond.
4479     */
4480     retval= &table->pos_in_table_list->on_expr;
4481   }
4482   else if (table->pos_in_table_list->embedding &&
4483            !table->pos_in_table_list->embedding->sj_on_expr)
4484   {
4485     /*
4486       This is the inner side of a multi-table outer join. Use the
4487       appropriate ON expression.
4488     */
4489     retval= &(table->pos_in_table_list->embedding->on_expr);
4490   }
4491   else
4492   {
4493     /* The table is not inner wrt some LEFT JOIN. Use the WHERE clause */
4494     retval= &join->conds;
4495   }
4496   return retval;
4497 }
4498 
4499 
4500 /**
4501   Calculate the best possible join and initialize the join structure.
4502 
4503   @retval
4504     0	ok
4505   @retval
4506     1	Fatal error
4507 */
4508 
4509 static bool
make_join_statistics(JOIN * join,List<TABLE_LIST> & tables_list,DYNAMIC_ARRAY * keyuse_array)4510 make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
4511                      DYNAMIC_ARRAY *keyuse_array)
4512 {
4513   int error= 0;
4514   TABLE *UNINIT_VAR(table); /* inited in all loops */
4515   uint i,table_count,const_count,key;
4516   table_map found_const_table_map, all_table_map;
4517   key_map const_ref, eq_part;
4518   bool has_expensive_keyparts;
4519   TABLE **table_vector;
4520   JOIN_TAB *stat,*stat_end,*s,**stat_ref, **stat_vector;
4521   KEYUSE *keyuse,*start_keyuse;
4522   table_map outer_join=0;
4523   table_map no_rows_const_tables= 0;
4524   SARGABLE_PARAM *sargables= 0;
4525   List_iterator<TABLE_LIST> ti(tables_list);
4526   TABLE_LIST *tables;
4527   DBUG_ENTER("make_join_statistics");
4528 
4529   table_count=join->table_count;
4530 
4531   /*
4532     best_positions is ok to allocate with alloc() as we copy things to it with
4533     memcpy()
4534   */
4535 
4536   if (!multi_alloc_root(join->thd->mem_root,
4537                         &stat, sizeof(JOIN_TAB)*(table_count),
4538                         &stat_ref, sizeof(JOIN_TAB*)* MAX_TABLES,
4539                         &stat_vector, sizeof(JOIN_TAB*)* (table_count +1),
4540                         &table_vector, sizeof(TABLE*)*(table_count*2),
4541                         &join->positions, sizeof(POSITION)*(table_count + 1),
4542                         &join->best_positions,
4543                         sizeof(POSITION)*(table_count + 1),
4544                         NullS))
4545     DBUG_RETURN(1);
4546 
4547   /* The following should be optimized to only clear critical things */
4548   bzero((void*)stat, sizeof(JOIN_TAB)* table_count);
4549   /* Initialize POSITION objects */
4550   for (i=0 ; i <= table_count ; i++)
4551     (void) new ((char*) (join->positions + i)) POSITION;
4552 
4553   join->best_ref= stat_vector;
4554 
4555   stat_end=stat+table_count;
4556   found_const_table_map= all_table_map=0;
4557   const_count=0;
4558 
4559   for (s= stat, i= 0; (tables= ti++); s++, i++)
4560   {
4561     TABLE_LIST *embedding= tables->embedding;
4562     stat_vector[i]=s;
4563     s->keys.init();
4564     s->const_keys.init();
4565     s->checked_keys.init();
4566     s->needed_reg.init();
4567     table_vector[i]=s->table=table=tables->table;
4568     s->tab_list= tables;
4569     table->pos_in_table_list= tables;
4570     error= tables->fetch_number_of_rows();
4571     set_statistics_for_table(join->thd, table);
4572     bitmap_clear_all(&table->cond_set);
4573 
4574 #ifdef WITH_PARTITION_STORAGE_ENGINE
4575     const bool all_partitions_pruned_away= table->all_partitions_pruned_away;
4576 #else
4577     const bool all_partitions_pruned_away= FALSE;
4578 #endif
4579 
4580     DBUG_EXECUTE_IF("bug11747970_raise_error",
4581                     { join->thd->set_killed(KILL_QUERY_HARD); });
4582     if (unlikely(error))
4583     {
4584       table->file->print_error(error, MYF(0));
4585       goto error;
4586     }
4587     table->quick_keys.clear_all();
4588     table->intersect_keys.clear_all();
4589     table->reginfo.join_tab=s;
4590     table->reginfo.not_exists_optimize=0;
4591     bzero((char*) table->const_key_parts, sizeof(key_part_map)*table->s->keys);
4592     all_table_map|= table->map;
4593     s->preread_init_done= FALSE;
4594     s->join=join;
4595 
4596     s->dependent= tables->dep_tables;
4597     if (tables->schema_table)
4598       table->file->stats.records= table->used_stat_records= 2;
4599     table->quick_condition_rows= table->stat_records();
4600 
4601     s->on_expr_ref= &tables->on_expr;
4602     if (*s->on_expr_ref)
4603     {
4604       /* s is the only inner table of an outer join */
4605       if (!table->is_filled_at_execution() &&
4606           ((!table->file->stats.records &&
4607             (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT)) ||
4608            all_partitions_pruned_away) && !embedding)
4609       {						// Empty table
4610         s->dependent= 0;                        // Ignore LEFT JOIN depend.
4611         no_rows_const_tables |= table->map;
4612 	set_position(join,const_count++,s,(KEYUSE*) 0);
4613 	continue;
4614       }
4615       outer_join|= table->map;
4616       s->embedding_map= 0;
4617       for (;embedding; embedding= embedding->embedding)
4618         s->embedding_map|= embedding->nested_join->nj_map;
4619       continue;
4620     }
4621     if (embedding)
4622     {
4623       /* s belongs to a nested join, maybe to several embedded joins */
4624       s->embedding_map= 0;
4625       bool inside_an_outer_join= FALSE;
4626       do
4627       {
4628         /*
4629           If this is a semi-join nest, skip it, and proceed upwards. Maybe
4630           we're in some outer join nest
4631         */
4632         if (embedding->sj_on_expr)
4633         {
4634           embedding= embedding->embedding;
4635           continue;
4636         }
4637         inside_an_outer_join= TRUE;
4638         NESTED_JOIN *nested_join= embedding->nested_join;
4639         s->embedding_map|=nested_join->nj_map;
4640         s->dependent|= embedding->dep_tables;
4641         embedding= embedding->embedding;
4642         outer_join|= nested_join->used_tables;
4643       }
4644       while (embedding);
4645       if (inside_an_outer_join)
4646         continue;
4647     }
4648     if (!table->is_filled_at_execution() &&
4649         (table->s->system ||
4650          (table->file->stats.records <= 1 &&
4651           (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT)) ||
4652          all_partitions_pruned_away) &&
4653 	!s->dependent &&
4654         !table->fulltext_searched && !join->no_const_tables)
4655     {
4656       set_position(join,const_count++,s,(KEYUSE*) 0);
4657       no_rows_const_tables |= table->map;
4658     }
4659 
4660     /* SJ-Materialization handling: */
4661     if (table->pos_in_table_list->jtbm_subselect &&
4662         table->pos_in_table_list->jtbm_subselect->is_jtbm_const_tab)
4663     {
4664       set_position(join,const_count++,s,(KEYUSE*) 0);
4665       no_rows_const_tables |= table->map;
4666     }
4667   }
4668 
4669   stat_vector[i]=0;
4670   join->outer_join=outer_join;
4671 
4672   if (join->outer_join)
4673   {
4674     /*
4675        Build transitive closure for relation 'to be dependent on'.
4676        This will speed up the plan search for many cases with outer joins,
4677        as well as allow us to catch illegal cross references/
4678        Warshall's algorithm is used to build the transitive closure.
4679        As we use bitmaps to represent the relation the complexity
4680        of the algorithm is O((number of tables)^2).
4681 
4682        The classic form of the Warshall's algorithm would look like:
4683        for (i= 0; i < table_count; i++)
4684        {
4685          for (j= 0; j < table_count; j++)
4686          {
4687            for (k= 0; k < table_count; k++)
4688            {
4689              if (bitmap_is_set(stat[j].dependent, i) &&
4690                  bitmap_is_set(stat[i].dependent, k))
4691                bitmap_set_bit(stat[j].dependent, k);
4692            }
4693          }
4694        }
4695     */
4696 
4697     for (s= stat ; s < stat_end ; s++)
4698     {
4699       table= s->table;
4700       for (JOIN_TAB *t= stat ; t < stat_end ; t++)
4701       {
4702         if (t->dependent & table->map)
4703           t->dependent |= table->reginfo.join_tab->dependent;
4704       }
4705       if (outer_join & s->table->map)
4706         s->table->maybe_null= 1;
4707     }
4708     /* Catch illegal cross references for outer joins */
4709     for (i= 0, s= stat ; i < table_count ; i++, s++)
4710     {
4711       if (s->dependent & s->table->map)
4712       {
4713         join->table_count=0;			// Don't use join->table
4714         my_message(ER_WRONG_OUTER_JOIN,
4715                    ER_THD(join->thd, ER_WRONG_OUTER_JOIN), MYF(0));
4716         goto error;
4717       }
4718       s->key_dependent= s->dependent;
4719     }
4720   }
4721 
4722   if (join->conds || outer_join)
4723   {
4724     if (update_ref_and_keys(join->thd, keyuse_array, stat, join->table_count,
4725                             join->conds, ~outer_join, join->select_lex, &sargables))
4726       goto error;
4727     /*
4728       Keyparts without prefixes may be useful if this JOIN is a subquery, and
4729       if the subquery may be executed via the IN-EXISTS strategy.
4730     */
4731     bool skip_unprefixed_keyparts=
4732       !(join->is_in_subquery() &&
4733         ((Item_in_subselect*)join->unit->item)->test_strategy(SUBS_IN_TO_EXISTS));
4734 
4735     if (keyuse_array->elements &&
4736         sort_and_filter_keyuse(join->thd, keyuse_array,
4737                                skip_unprefixed_keyparts))
4738       goto error;
4739     DBUG_EXECUTE("opt", print_keyuse_array(keyuse_array););
4740   }
4741 
4742   join->const_table_map= no_rows_const_tables;
4743   join->const_tables= const_count;
4744   eliminate_tables(join);
4745   join->const_table_map &= ~no_rows_const_tables;
4746   const_count= join->const_tables;
4747   found_const_table_map= join->const_table_map;
4748 
4749   /* Read tables with 0 or 1 rows (system tables) */
4750   for (POSITION *p_pos=join->positions, *p_end=p_pos+const_count;
4751        p_pos < p_end ;
4752        p_pos++)
4753   {
4754     s= p_pos->table;
4755     if (! (s->table->map & join->eliminated_tables))
4756     {
4757       int tmp;
4758       s->type=JT_SYSTEM;
4759       join->const_table_map|=s->table->map;
4760       if ((tmp=join_read_const_table(join->thd, s, p_pos)))
4761       {
4762         if (tmp > 0)
4763           goto error;		// Fatal error
4764       }
4765       else
4766       {
4767         found_const_table_map|= s->table->map;
4768         s->table->pos_in_table_list->optimized_away= TRUE;
4769       }
4770     }
4771   }
4772 
4773   /* loop until no more const tables are found */
4774   int ref_changed;
4775   do
4776   {
4777     ref_changed = 0;
4778   more_const_tables_found:
4779 
4780     /*
4781       We only have to loop from stat_vector + const_count as
4782       set_position() will move all const_tables first in stat_vector
4783     */
4784 
4785     for (JOIN_TAB **pos=stat_vector+const_count ; (s= *pos) ; pos++)
4786     {
4787       table=s->table;
4788 
4789       if (table->is_filled_at_execution())
4790         continue;
4791 
4792       /*
4793         If equi-join condition by a key is null rejecting and after a
4794         substitution of a const table the key value happens to be null
4795         then we can state that there are no matches for this equi-join.
4796       */
4797       if ((keyuse= s->keyuse) && *s->on_expr_ref && !s->embedding_map &&
4798          !(table->map & join->eliminated_tables))
4799       {
4800         /*
4801           When performing an outer join operation if there are no matching rows
4802           for the single row of the outer table all the inner tables are to be
4803           null complemented and thus considered as constant tables.
4804           Here we apply this consideration to the case of outer join operations
4805           with a single inner table only because the case with nested tables
4806           would require a more thorough analysis.
4807           TODO. Apply single row substitution to null complemented inner tables
4808           for nested outer join operations.
4809 	*/
4810         while (keyuse->table == table)
4811         {
4812           if (!keyuse->is_for_hash_join() &&
4813               !(keyuse->val->used_tables() & ~join->const_table_map) &&
4814               keyuse->val->is_null() && keyuse->null_rejecting)
4815           {
4816             s->type= JT_CONST;
4817             s->table->const_table= 1;
4818             mark_as_null_row(table);
4819             found_const_table_map|= table->map;
4820 	    join->const_table_map|= table->map;
4821 	    set_position(join,const_count++,s,(KEYUSE*) 0);
4822             goto more_const_tables_found;
4823            }
4824 	  keyuse++;
4825         }
4826       }
4827 
4828       if (s->dependent)				// If dependent on some table
4829       {
4830 	// All dep. must be constants
4831 	if (s->dependent & ~(found_const_table_map))
4832 	  continue;
4833 	if (table->file->stats.records <= 1L &&
4834 	    (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) &&
4835             !table->pos_in_table_list->embedding &&
4836 	      !((outer_join & table->map) &&
4837 		(*s->on_expr_ref)->is_expensive()))
4838 	{					// system table
4839 	  int tmp= 0;
4840 	  s->type=JT_SYSTEM;
4841 	  join->const_table_map|=table->map;
4842 	  set_position(join,const_count++,s,(KEYUSE*) 0);
4843 	  if ((tmp= join_read_const_table(join->thd, s, join->positions+const_count-1)))
4844 	  {
4845 	    if (tmp > 0)
4846 	      goto error;			// Fatal error
4847 	  }
4848 	  else
4849 	    found_const_table_map|= table->map;
4850 	  continue;
4851 	}
4852       }
4853       /* check if table can be read by key or table only uses const refs */
4854       if ((keyuse=s->keyuse))
4855       {
4856 	s->type= JT_REF;
4857 	while (keyuse->table == table)
4858 	{
4859           if (keyuse->is_for_hash_join())
4860 	  {
4861             keyuse++;
4862             continue;
4863           }
4864 	  start_keyuse=keyuse;
4865 	  key=keyuse->key;
4866 	  s->keys.set_bit(key);               // TODO: remove this ?
4867 
4868           const_ref.clear_all();
4869 	  eq_part.clear_all();
4870           has_expensive_keyparts= false;
4871 	  do
4872 	  {
4873             if (keyuse->val->type() != Item::NULL_ITEM &&
4874                 !keyuse->optimize &&
4875                 keyuse->keypart != FT_KEYPART)
4876 	    {
4877 	      if (!((~found_const_table_map) & keyuse->used_tables))
4878               {
4879 		const_ref.set_bit(keyuse->keypart);
4880                 if (keyuse->val->is_expensive())
4881                   has_expensive_keyparts= true;
4882               }
4883 	      eq_part.set_bit(keyuse->keypart);
4884 	    }
4885 	    keyuse++;
4886 	  } while (keyuse->table == table && keyuse->key == key);
4887 
4888           TABLE_LIST *embedding= table->pos_in_table_list->embedding;
4889           /*
4890             TODO (low priority): currently we ignore the const tables that
4891             are within a semi-join nest which is within an outer join nest.
4892             The effect of this is that we don't do const substitution for
4893             such tables.
4894           */
4895           KEY *keyinfo= table->key_info + key;
4896           uint  key_parts= table->actual_n_key_parts(keyinfo);
4897           if (eq_part.is_prefix(key_parts) &&
4898               !table->fulltext_searched &&
4899               (!embedding || (embedding->sj_on_expr && !embedding->embedding)))
4900 	  {
4901             key_map base_part, base_const_ref, base_eq_part;
4902             base_part.set_prefix(keyinfo->user_defined_key_parts);
4903             base_const_ref= const_ref;
4904             base_const_ref.intersect(base_part);
4905             base_eq_part= eq_part;
4906             base_eq_part.intersect(base_part);
4907             if (table->actual_key_flags(keyinfo) & HA_NOSAME)
4908             {
4909 
4910 	      if (base_const_ref == base_eq_part &&
4911                   !has_expensive_keyparts &&
4912                   !((outer_join & table->map) &&
4913                     (*s->on_expr_ref)->is_expensive()))
4914 	      {					// Found everything for ref.
4915 	        int tmp;
4916 	        ref_changed = 1;
4917 	        s->type= JT_CONST;
4918 	        join->const_table_map|=table->map;
4919 	        set_position(join,const_count++,s,start_keyuse);
4920                 /* create_ref_for_key will set s->table->const_table */
4921 	        if (create_ref_for_key(join, s, start_keyuse, FALSE,
4922 				       found_const_table_map))
4923                   goto error;
4924 	        if ((tmp=join_read_const_table(join->thd, s,
4925                                                join->positions+const_count-1)))
4926 	        {
4927 		  if (tmp > 0)
4928 		    goto error;			// Fatal error
4929 	        }
4930 	        else
4931 		  found_const_table_map|= table->map;
4932 	        break;
4933 	      }
4934 	    }
4935             else if (base_const_ref == base_eq_part)
4936               s->const_keys.set_bit(key);
4937           }
4938 	}
4939       }
4940     }
4941   } while (ref_changed);
4942 
4943   join->sort_by_table= get_sort_by_table(join->order, join->group_list,
4944                                          join->select_lex->leaf_tables,
4945                                          join->const_table_map);
4946   /*
4947     Update info on indexes that can be used for search lookups as
4948     reading const tables may has added new sargable predicates.
4949   */
4950   if (const_count && sargables)
4951   {
4952     for( ; sargables->field ; sargables++)
4953     {
4954       Field *field= sargables->field;
4955       JOIN_TAB *join_tab= field->table->reginfo.join_tab;
4956       key_map possible_keys= field->key_start;
4957       possible_keys.intersect(field->table->keys_in_use_for_query);
4958       bool is_const= 1;
4959       for (uint j=0; j < sargables->num_values; j++)
4960         is_const&= sargables->arg_value[j]->const_item();
4961       if (is_const)
4962         join_tab[0].const_keys.merge(possible_keys);
4963     }
4964   }
4965 
4966   join->impossible_where= false;
4967   if (join->conds && const_count)
4968   {
4969     Item* &conds= join->conds;
4970     COND_EQUAL *orig_cond_equal = join->cond_equal;
4971 
4972     conds->update_used_tables();
4973     conds= conds->remove_eq_conds(join->thd, &join->cond_value, true);
4974     if (conds && conds->type() == Item::COND_ITEM &&
4975         ((Item_cond*) conds)->functype() == Item_func::COND_AND_FUNC)
4976       join->cond_equal= &((Item_cond_and*) conds)->m_cond_equal;
4977     join->select_lex->where= conds;
4978     if (join->cond_value == Item::COND_FALSE)
4979     {
4980       join->impossible_where= true;
4981       conds= new (join->thd->mem_root) Item_int(join->thd, (longlong) 0, 1);
4982     }
4983 
4984     join->cond_equal= NULL;
4985     if (conds)
4986     {
4987       if (conds->type() == Item::COND_ITEM &&
4988 	  ((Item_cond*) conds)->functype() == Item_func::COND_AND_FUNC)
4989         join->cond_equal= (&((Item_cond_and *) conds)->m_cond_equal);
4990       else if (conds->type() == Item::FUNC_ITEM &&
4991 	       ((Item_func*) conds)->functype() == Item_func::MULT_EQUAL_FUNC)
4992       {
4993         if (!join->cond_equal)
4994           join->cond_equal= new COND_EQUAL;
4995         join->cond_equal->current_level.empty();
4996         join->cond_equal->current_level.push_back((Item_equal*) conds,
4997                                                   join->thd->mem_root);
4998       }
4999     }
5000 
5001     if (orig_cond_equal != join->cond_equal)
5002     {
5003       /*
5004         If join->cond_equal has changed all references to it from COND_EQUAL
5005         objects associated with ON expressions must be updated.
5006       */
5007       for (JOIN_TAB **pos=stat_vector+const_count ; (s= *pos) ; pos++)
5008       {
5009         if (*s->on_expr_ref && s->cond_equal &&
5010 	    s->cond_equal->upper_levels == orig_cond_equal)
5011           s->cond_equal->upper_levels= join->cond_equal;
5012       }
5013     }
5014   }
5015 
5016   /* Calc how many (possible) matched records in each table */
5017 
5018   for (s=stat ; s < stat_end ; s++)
5019   {
5020     s->startup_cost= 0;
5021     if (s->type == JT_SYSTEM || s->type == JT_CONST)
5022     {
5023       /* Only one matching row */
5024       s->found_records= s->records= 1;
5025       s->read_time=1.0;
5026       s->worst_seeks=1.0;
5027       continue;
5028     }
5029     /* Approximate found rows and time to read them */
5030     if (s->table->is_filled_at_execution())
5031     {
5032       get_delayed_table_estimates(s->table, &s->records, &s->read_time,
5033                                   &s->startup_cost);
5034       s->found_records= s->records;
5035       table->quick_condition_rows=s->records;
5036     }
5037     else
5038     {
5039        s->scan_time();
5040     }
5041 
5042     if (s->table->is_splittable())
5043       s->add_keyuses_for_splitting();
5044 
5045     /*
5046       Set a max range of how many seeks we can expect when using keys
5047       This is can't be to high as otherwise we are likely to use
5048       table scan.
5049     */
5050     s->worst_seeks= MY_MIN((double) s->found_records / 10,
5051 			(double) s->read_time*3);
5052     if (s->worst_seeks < 2.0)			// Fix for small tables
5053       s->worst_seeks=2.0;
5054 
5055     /*
5056       Add to stat->const_keys those indexes for which all group fields or
5057       all select distinct fields participate in one index.
5058     */
5059     add_group_and_distinct_keys(join, s);
5060 
5061     s->table->cond_selectivity= 1.0;
5062 
5063     /*
5064       Perform range analysis if there are keys it could use (1).
5065       Don't do range analysis for materialized subqueries (2).
5066       Don't do range analysis for materialized derived tables (3)
5067     */
5068     if ((!s->const_keys.is_clear_all() ||
5069 	 !bitmap_is_clear_all(&s->table->cond_set)) &&              // (1)
5070         !s->table->is_filled_at_execution() &&                      // (2)
5071         !(s->table->pos_in_table_list->derived &&                   // (3)
5072           s->table->pos_in_table_list->is_materialized_derived()))  // (3)
5073     {
5074       bool impossible_range= FALSE;
5075       ha_rows records= HA_POS_ERROR;
5076       SQL_SELECT *select= 0;
5077       Item **sargable_cond= NULL;
5078       if (!s->const_keys.is_clear_all())
5079       {
5080         sargable_cond= get_sargable_cond(join, s->table);
5081         bool is_sargable_cond_of_where= sargable_cond == &join->conds;
5082 
5083         select= make_select(s->table, found_const_table_map,
5084 			    found_const_table_map,
5085                             *sargable_cond,
5086                             (SORT_INFO*) 0,
5087 			    1, &error);
5088         if (!select)
5089           goto error;
5090         records= get_quick_record_count(join->thd, select, s->table,
5091 				        &s->const_keys, join->row_limit);
5092 
5093         /*
5094           Range analyzer might have modified the condition. Put it the new
5095           condition to where we got it from.
5096         */
5097         *sargable_cond= select->cond;
5098 
5099         if (is_sargable_cond_of_where &&
5100             join->conds && join->conds->type() == Item::COND_ITEM &&
5101             ((Item_cond*) (join->conds))->functype() ==
5102             Item_func::COND_AND_FUNC)
5103           join->cond_equal= &((Item_cond_and*) (join->conds))->m_cond_equal;
5104 
5105         s->quick=select->quick;
5106         s->needed_reg=select->needed_reg;
5107         select->quick=0;
5108         impossible_range= records == 0 && s->table->reginfo.impossible_range;
5109       }
5110       if (!impossible_range)
5111       {
5112         if (!sargable_cond)
5113           sargable_cond= get_sargable_cond(join, s->table);
5114         if (join->thd->variables.optimizer_use_condition_selectivity > 1)
5115           calculate_cond_selectivity_for_table(join->thd, s->table,
5116                                                sargable_cond);
5117         if (s->table->reginfo.impossible_range)
5118 	{
5119           impossible_range= TRUE;
5120           records= 0;
5121         }
5122       }
5123       if (impossible_range)
5124       {
5125         /*
5126           Impossible WHERE or ON expression
5127           In case of ON, we mark that the we match one empty NULL row.
5128           In case of WHERE, don't set found_const_table_map to get the
5129           caller to abort with a zero row result.
5130         */
5131         TABLE_LIST *emb= s->table->pos_in_table_list->embedding;
5132         if (emb && !emb->sj_on_expr)
5133         {
5134           /* Mark all tables in a multi-table join nest as const */
5135           mark_join_nest_as_const(join, emb, &found_const_table_map,
5136                                 &const_count);
5137         }
5138         else
5139         {
5140           join->const_table_map|= s->table->map;
5141           set_position(join,const_count++,s,(KEYUSE*) 0);
5142           s->type= JT_CONST;
5143           s->table->const_table= 1;
5144           if (*s->on_expr_ref)
5145           {
5146             /* Generate empty row */
5147             s->info= ET_IMPOSSIBLE_ON_CONDITION;
5148             found_const_table_map|= s->table->map;
5149             mark_as_null_row(s->table);		// All fields are NULL
5150           }
5151         }
5152       }
5153       if (records != HA_POS_ERROR)
5154       {
5155 	s->found_records=records;
5156 	s->read_time= s->quick ? s->quick->read_time : 0.0;
5157       }
5158       if (select)
5159         delete select;
5160     }
5161 
5162   }
5163 
5164   if (pull_out_semijoin_tables(join))
5165     DBUG_RETURN(TRUE);
5166 
5167   join->join_tab=stat;
5168   join->top_join_tab_count= table_count;
5169   join->map2table=stat_ref;
5170   join->table= table_vector;
5171   join->const_tables=const_count;
5172   join->found_const_table_map=found_const_table_map;
5173 
5174   if (join->const_tables != join->table_count)
5175     optimize_keyuse(join, keyuse_array);
5176 
5177   DBUG_ASSERT(!join->conds || !join->cond_equal ||
5178               !join->cond_equal->current_level.elements ||
5179               (join->conds->type() == Item::COND_ITEM &&
5180 	       ((Item_cond*) (join->conds))->functype() ==
5181                Item_func::COND_AND_FUNC &&
5182                join->cond_equal ==
5183 	       &((Item_cond_and *) (join->conds))->m_cond_equal) ||
5184               (join->conds->type() == Item::FUNC_ITEM &&
5185 	       ((Item_func*) (join->conds))->functype() ==
5186                Item_func::MULT_EQUAL_FUNC &&
5187 	       join->cond_equal->current_level.elements == 1 &&
5188                join->cond_equal->current_level.head() == join->conds));
5189 
5190   if (optimize_semijoin_nests(join, all_table_map))
5191     DBUG_RETURN(TRUE); /* purecov: inspected */
5192 
5193   {
5194     double records= 1;
5195     SELECT_LEX_UNIT *unit= join->select_lex->master_unit();
5196 
5197     /* Find an optimal join order of the non-constant tables. */
5198     if (join->const_tables != join->table_count)
5199     {
5200       if (choose_plan(join, all_table_map & ~join->const_table_map))
5201         goto error;
5202 
5203 #ifdef HAVE_valgrind
5204       // JOIN::positions holds the current query plan. We've already
5205       // made the plan choice, so we should only use JOIN::best_positions
5206       for (uint k=join->const_tables; k < join->table_count; k++)
5207         MEM_UNDEFINED(&join->positions[k], sizeof(join->positions[k]));
5208 #endif
5209     }
5210     else
5211     {
5212       memcpy((uchar*) join->best_positions,(uchar*) join->positions,
5213 	     sizeof(POSITION)*join->const_tables);
5214       join->join_record_count= 1.0;
5215       join->best_read=1.0;
5216     }
5217 
5218     if (!(join->select_options & SELECT_DESCRIBE) &&
5219         unit->derived && unit->derived->is_materialized_derived())
5220     {
5221       /*
5222         Calculate estimated number of rows for materialized derived
5223         table/view.
5224       */
5225       for (i= 0; i < join->table_count ; i++)
5226         if (double rr= join->best_positions[i].records_read)
5227           records= COST_MULT(records, rr);
5228       ha_rows rows= records > (double) HA_ROWS_MAX ? HA_ROWS_MAX : (ha_rows) records;
5229       set_if_smaller(rows, unit->select_limit_cnt);
5230       join->select_lex->increase_derived_records(rows);
5231     }
5232   }
5233 
5234   if (join->choose_subquery_plan(all_table_map & ~join->const_table_map))
5235     goto error;
5236 
5237   DEBUG_SYNC(join->thd, "inside_make_join_statistics");
5238 
5239   DBUG_RETURN(0);
5240 
5241 error:
5242   /*
5243     Need to clean up join_tab from TABLEs in case of error.
5244     They won't get cleaned up by JOIN::cleanup() because JOIN::join_tab
5245     may not be assigned yet by this function (which is building join_tab).
5246     Dangling TABLE::reginfo.join_tab may cause part_of_refkey to choke.
5247   */
5248   {
5249     TABLE_LIST *tmp_table;
5250     List_iterator<TABLE_LIST> ti2(tables_list);
5251     while ((tmp_table= ti2++))
5252       tmp_table->table->reginfo.join_tab= NULL;
5253   }
5254   DBUG_RETURN (1);
5255 }
5256 
5257 
5258 /*****************************************************************************
5259   Check with keys are used and with tables references with tables
5260   Updates in stat:
5261 	  keys	     Bitmap of all used keys
5262 	  const_keys Bitmap of all keys with may be used with quick_select
5263 	  keyuse     Pointer to possible keys
5264 *****************************************************************************/
5265 
5266 
5267 /**
5268   Merge new key definitions to old ones, remove those not used in both.
5269 
5270   This is called for OR between different levels.
5271 
5272   That is, the function operates on an array of KEY_FIELD elements which has
5273   two parts:
5274 
5275                       $LEFT_PART             $RIGHT_PART
5276              +-----------------------+-----------------------+
5277             start                new_fields                 end
5278 
5279   $LEFT_PART and $RIGHT_PART are arrays that have KEY_FIELD elements for two
5280   parts of the OR condition. Our task is to produce an array of KEY_FIELD
5281   elements that would correspond to "$LEFT_PART OR $RIGHT_PART".
5282 
5283   The rules for combining elements are as follows:
5284 
5285     (keyfieldA1 AND keyfieldA2 AND ...) OR (keyfieldB1 AND keyfieldB2 AND ...)=
5286 
5287      = AND_ij (keyfieldA_i OR keyfieldB_j)
5288 
5289   We discard all (keyfieldA_i OR keyfieldB_j) that refer to different
5290   fields. For those referring to the same field, the logic is as follows:
5291 
5292     t.keycol=expr1 OR t.keycol=expr2 -> (since expr1 and expr2 are different
5293                                          we can't produce a single equality,
5294                                          so produce nothing)
5295 
5296     t.keycol=expr1 OR t.keycol=expr1 -> t.keycol=expr1
5297 
5298     t.keycol=expr1 OR t.keycol IS NULL -> t.keycol=expr1, and also set
5299                                           KEY_OPTIMIZE_REF_OR_NULL flag
5300 
5301   The last one is for ref_or_null access. We have handling for this special
5302   because it's needed for evaluating IN subqueries that are internally
5303   transformed into
5304 
5305   @code
5306     EXISTS(SELECT * FROM t1 WHERE t1.key=outer_ref_field or t1.key IS NULL)
5307   @endcode
5308 
5309   See add_key_fields() for discussion of what is and_level.
5310 
5311   KEY_FIELD::null_rejecting is processed as follows: @n
5312   result has null_rejecting=true if it is set for both ORed references.
5313   for example:
5314   -   (t2.key = t1.field OR t2.key  =  t1.field) -> null_rejecting=true
5315   -   (t2.key = t1.field OR t2.key <=> t1.field) -> null_rejecting=false
5316 
5317   @todo
5318     The result of this is that we're missing some 'ref' accesses.
5319     OptimizerTeam: Fix this
5320 */
5321 
5322 static KEY_FIELD *
merge_key_fields(KEY_FIELD * start,KEY_FIELD * new_fields,KEY_FIELD * end,uint and_level)5323 merge_key_fields(KEY_FIELD *start,KEY_FIELD *new_fields,KEY_FIELD *end,
5324 		 uint and_level)
5325 {
5326   if (start == new_fields)
5327     return start;				// Impossible or
5328   if (new_fields == end)
5329     return start;				// No new fields, skip all
5330 
5331   KEY_FIELD *first_free=new_fields;
5332 
5333   /* Mark all found fields in old array */
5334   for (; new_fields != end ; new_fields++)
5335   {
5336     for (KEY_FIELD *old=start ; old != first_free ; old++)
5337     {
5338       if (old->field == new_fields->field)
5339       {
5340         /*
5341           NOTE: below const_item() call really works as "!used_tables()", i.e.
5342           it can return FALSE where it is feasible to make it return TRUE.
5343 
5344           The cause is as follows: Some of the tables are already known to be
5345           const tables (the detection code is in make_join_statistics(),
5346           above the update_ref_and_keys() call), but we didn't propagate
5347           information about this: TABLE::const_table is not set to TRUE, and
5348           Item::update_used_tables() hasn't been called for each item.
5349           The result of this is that we're missing some 'ref' accesses.
5350           TODO: OptimizerTeam: Fix this
5351         */
5352 	if (!new_fields->val->const_item())
5353 	{
5354 	  /*
5355 	    If the value matches, we can use the key reference.
5356 	    If not, we keep it until we have examined all new values
5357 	  */
5358 	  if (old->val->eq(new_fields->val, old->field->binary()))
5359 	  {
5360 	    old->level= and_level;
5361 	    old->optimize= ((old->optimize & new_fields->optimize &
5362 			     KEY_OPTIMIZE_EXISTS) |
5363 			    ((old->optimize | new_fields->optimize) &
5364 			     KEY_OPTIMIZE_REF_OR_NULL));
5365             old->null_rejecting= (old->null_rejecting &&
5366                                   new_fields->null_rejecting);
5367 	  }
5368 	}
5369 	else if (old->eq_func && new_fields->eq_func &&
5370                  old->val->eq_by_collation(new_fields->val,
5371                                            old->field->binary(),
5372                                            old->field->charset()))
5373 
5374 	{
5375 	  old->level= and_level;
5376 	  old->optimize= ((old->optimize & new_fields->optimize &
5377 			   KEY_OPTIMIZE_EXISTS) |
5378 			  ((old->optimize | new_fields->optimize) &
5379 			   KEY_OPTIMIZE_REF_OR_NULL));
5380           old->null_rejecting= (old->null_rejecting &&
5381                                 new_fields->null_rejecting);
5382 	}
5383 	else if (old->eq_func && new_fields->eq_func &&
5384 		 ((old->val->const_item() && !old->val->is_expensive() &&
5385                    old->val->is_null()) ||
5386                   (!new_fields->val->is_expensive() &&
5387                    new_fields->val->is_null())))
5388 	{
5389 	  /* field = expression OR field IS NULL */
5390 	  old->level= and_level;
5391           if (old->field->maybe_null())
5392 	  {
5393 	    old->optimize= KEY_OPTIMIZE_REF_OR_NULL;
5394             /* The referred expression can be NULL: */
5395             old->null_rejecting= 0;
5396 	  }
5397 	  /*
5398             Remember the NOT NULL value unless the value does not depend
5399             on other tables.
5400           */
5401 	  if (!old->val->used_tables() && !old->val->is_expensive() &&
5402               old->val->is_null())
5403 	    old->val= new_fields->val;
5404 	}
5405 	else
5406 	{
5407 	  /*
5408 	    We are comparing two different const.  In this case we can't
5409 	    use a key-lookup on this so it's better to remove the value
5410 	    and let the range optimzier handle it
5411 	  */
5412 	  if (old == --first_free)		// If last item
5413 	    break;
5414 	  *old= *first_free;			// Remove old value
5415 	  old--;				// Retry this value
5416 	}
5417       }
5418     }
5419   }
5420   /* Remove all not used items */
5421   for (KEY_FIELD *old=start ; old != first_free ;)
5422   {
5423     if (old->level != and_level)
5424     {						// Not used in all levels
5425       if (old == --first_free)
5426 	break;
5427       *old= *first_free;			// Remove old value
5428       continue;
5429     }
5430     old++;
5431   }
5432   return first_free;
5433 }
5434 
5435 
5436 /*
5437   Given a field, return its index in semi-join's select list, or UINT_MAX
5438 
5439   DESCRIPTION
5440     Given a field, we find its table; then see if the table is within a
5441     semi-join nest and if the field was in select list of the subselect.
5442     If it was, we return field's index in the select list. The value is used
5443     by LooseScan strategy.
5444 */
5445 
get_semi_join_select_list_index(Field * field)5446 static uint get_semi_join_select_list_index(Field *field)
5447 {
5448   uint res= UINT_MAX;
5449   TABLE_LIST *emb_sj_nest;
5450   if ((emb_sj_nest= field->table->pos_in_table_list->embedding) &&
5451       emb_sj_nest->sj_on_expr)
5452   {
5453     Item_in_subselect *subq_pred= emb_sj_nest->sj_subq_pred;
5454     st_select_lex *subq_lex= subq_pred->unit->first_select();
5455     if (subq_pred->left_expr->cols() == 1)
5456     {
5457       Item *sel_item= subq_lex->ref_pointer_array[0];
5458       if (sel_item->type() == Item::FIELD_ITEM &&
5459           ((Item_field*)sel_item)->field->eq(field))
5460       {
5461         res= 0;
5462       }
5463     }
5464     else
5465     {
5466       for (uint i= 0; i < subq_pred->left_expr->cols(); i++)
5467       {
5468         Item *sel_item= subq_lex->ref_pointer_array[i];
5469         if (sel_item->type() == Item::FIELD_ITEM &&
5470             ((Item_field*)sel_item)->field->eq(field))
5471         {
5472           res= i;
5473           break;
5474         }
5475       }
5476     }
5477   }
5478   return res;
5479 }
5480 
5481 
5482 /**
5483   Add a possible key to array of possible keys if it's usable as a key
5484 
5485     @param key_fields      Pointer to add key, if usable
5486     @param and_level       And level, to be stored in KEY_FIELD
5487     @param cond            Condition predicate
5488     @param field           Field used in comparision
5489     @param eq_func         True if we used =, <=> or IS NULL
5490     @param value           Value used for comparison with field
5491     @param num_values      Number of values[] that we are comparing against
5492     @param usable_tables   Tables which can be used for key optimization
5493     @param sargables       IN/OUT Array of found sargable candidates
5494     @param row_col_no      if = n that > 0 then field is compared only
5495                            against the n-th component of row values
5496 
5497   @note
5498     If we are doing a NOT NULL comparison on a NOT NULL field in a outer join
5499     table, we store this to be able to do not exists optimization later.
5500 
5501   @returns
5502     *key_fields is incremented if we stored a key in the array
5503 */
5504 
5505 static void
add_key_field(JOIN * join,KEY_FIELD ** key_fields,uint and_level,Item_bool_func * cond,Field * field,bool eq_func,Item ** value,uint num_values,table_map usable_tables,SARGABLE_PARAM ** sargables,uint row_col_no=0)5506 add_key_field(JOIN *join,
5507               KEY_FIELD **key_fields,uint and_level, Item_bool_func *cond,
5508               Field *field, bool eq_func, Item **value, uint num_values,
5509               table_map usable_tables, SARGABLE_PARAM **sargables,
5510               uint row_col_no= 0)
5511 {
5512   uint optimize= 0;
5513   if (eq_func &&
5514       ((join->is_allowed_hash_join_access() &&
5515         field->hash_join_is_possible() &&
5516         !(field->table->pos_in_table_list->is_materialized_derived() &&
5517           field->table->is_created())) ||
5518        (field->table->pos_in_table_list->is_materialized_derived() &&
5519         !field->table->is_created() && !(field->flags & BLOB_FLAG))))
5520   {
5521     optimize= KEY_OPTIMIZE_EQ;
5522   }
5523   else if (!(field->flags & PART_KEY_FLAG))
5524   {
5525     // Don't remove column IS NULL on a LEFT JOIN table
5526     if (eq_func && (*value)->type() == Item::NULL_ITEM &&
5527         field->table->maybe_null && !field->null_ptr)
5528     {
5529       optimize= KEY_OPTIMIZE_EXISTS;
5530       DBUG_ASSERT(num_values == 1);
5531     }
5532   }
5533   if (optimize != KEY_OPTIMIZE_EXISTS)
5534   {
5535     table_map used_tables=0;
5536     bool optimizable=0;
5537     for (uint i=0; i<num_values; i++)
5538     {
5539       Item *curr_val;
5540       if (row_col_no && value[i]->real_item()->type() == Item::ROW_ITEM)
5541       {
5542         Item_row *value_tuple= (Item_row *) (value[i]->real_item());
5543         curr_val= value_tuple->element_index(row_col_no - 1);
5544       }
5545       else
5546         curr_val= value[i];
5547       table_map value_used_tables= curr_val->used_tables();
5548       used_tables|= value_used_tables;
5549       if (!(value_used_tables & (field->table->map | RAND_TABLE_BIT)))
5550         optimizable=1;
5551     }
5552     if (!optimizable)
5553       return;
5554     if (!(usable_tables & field->table->map))
5555     {
5556       if (!eq_func || (*value)->type() != Item::NULL_ITEM ||
5557           !field->table->maybe_null || field->null_ptr)
5558 	return;					// Can't use left join optimize
5559       optimize= KEY_OPTIMIZE_EXISTS;
5560     }
5561     else
5562     {
5563       JOIN_TAB *stat=field->table->reginfo.join_tab;
5564       key_map possible_keys=field->get_possible_keys();
5565       possible_keys.intersect(field->table->keys_in_use_for_query);
5566       stat[0].keys.merge(possible_keys);             // Add possible keys
5567 
5568       /*
5569 	Save the following cases:
5570 	Field op constant
5571 	Field LIKE constant where constant doesn't start with a wildcard
5572 	Field = field2 where field2 is in a different table
5573 	Field op formula
5574 	Field IS NULL
5575 	Field IS NOT NULL
5576          Field BETWEEN ...
5577          Field IN ...
5578       */
5579       if (field->flags & PART_KEY_FLAG)
5580         stat[0].key_dependent|=used_tables;
5581 
5582       bool is_const=1;
5583       for (uint i=0; i<num_values; i++)
5584       {
5585         Item *curr_val;
5586         if (row_col_no && value[i]->real_item()->type() == Item::ROW_ITEM)
5587 	{
5588           Item_row *value_tuple= (Item_row *) (value[i]->real_item());
5589           curr_val= value_tuple->element_index(row_col_no - 1);
5590         }
5591         else
5592           curr_val= value[i];
5593         if (!(is_const&= curr_val->const_item()))
5594           break;
5595       }
5596       if (is_const)
5597       {
5598         stat[0].const_keys.merge(possible_keys);
5599         bitmap_set_bit(&field->table->cond_set, field->field_index);
5600       }
5601       else if (!eq_func)
5602       {
5603         /*
5604           Save info to be able check whether this predicate can be
5605           considered as sargable for range analisis after reading const tables.
5606           We do not save info about equalities as update_const_equal_items
5607           will take care of updating info on keys from sargable equalities.
5608         */
5609         (*sargables)--;
5610         (*sargables)->field= field;
5611         (*sargables)->arg_value= value;
5612         (*sargables)->num_values= num_values;
5613       }
5614       if (!eq_func) // eq_func is NEVER true when num_values > 1
5615         return;
5616     }
5617   }
5618   /*
5619     For the moment eq_func is always true. This slot is reserved for future
5620     extensions where we want to remembers other things than just eq comparisons
5621   */
5622   DBUG_ASSERT(eq_func);
5623   /* Store possible eq field */
5624   (*key_fields)->field=		field;
5625   (*key_fields)->eq_func=	eq_func;
5626   (*key_fields)->val=		*value;
5627   (*key_fields)->cond=          cond;
5628   (*key_fields)->level=         and_level;
5629   (*key_fields)->optimize=      optimize;
5630   /*
5631     If the condition we are analyzing is NULL-rejecting and at least
5632     one side of the equalities is NULLable, mark the KEY_FIELD object as
5633     null-rejecting. This property is used by:
5634     - add_not_null_conds() to add "column IS NOT NULL" conditions
5635     - best_access_path() to produce better estimates for NULL-able unique keys.
5636   */
5637   {
5638     if ((cond->functype() == Item_func::EQ_FUNC ||
5639          cond->functype() == Item_func::MULT_EQUAL_FUNC) &&
5640         ((*value)->maybe_null || field->real_maybe_null()))
5641       (*key_fields)->null_rejecting= true;
5642     else
5643       (*key_fields)->null_rejecting= false;
5644   }
5645   (*key_fields)->cond_guard= NULL;
5646 
5647   (*key_fields)->sj_pred_no= get_semi_join_select_list_index(field);
5648   (*key_fields)++;
5649 }
5650 
5651 /**
5652   Add possible keys to array of possible keys originated from a simple
5653   predicate.
5654 
5655     @param  key_fields     Pointer to add key, if usable
5656     @param  and_level      And level, to be stored in KEY_FIELD
5657     @param  cond           Condition predicate
5658     @param  field_item     Field item used for comparison
5659     @param  eq_func        True if we used =, <=> or IS NULL
5660     @param  value          Value used for comparison with field_item
5661     @param   num_values    Number of values[] that we are comparing against
5662     @param  usable_tables  Tables which can be used for key optimization
5663     @param  sargables      IN/OUT Array of found sargable candidates
5664     @param row_col_no      if = n that > 0 then field is compared only
5665                            against the n-th component of row values
5666 
5667   @note
5668     If field items f1 and f2 belong to the same multiple equality and
5669     a key is added for f1, the the same key is added for f2.
5670 
5671   @returns
5672     *key_fields is incremented if we stored a key in the array
5673 */
5674 
5675 static void
add_key_equal_fields(JOIN * join,KEY_FIELD ** key_fields,uint and_level,Item_bool_func * cond,Item * field_item,bool eq_func,Item ** val,uint num_values,table_map usable_tables,SARGABLE_PARAM ** sargables,uint row_col_no=0)5676 add_key_equal_fields(JOIN *join, KEY_FIELD **key_fields, uint and_level,
5677                      Item_bool_func *cond, Item *field_item,
5678                      bool eq_func, Item **val,
5679                      uint num_values, table_map usable_tables,
5680                      SARGABLE_PARAM **sargables, uint row_col_no= 0)
5681 {
5682   Field *field= ((Item_field *) (field_item->real_item()))->field;
5683   add_key_field(join, key_fields, and_level, cond, field,
5684                 eq_func, val, num_values, usable_tables, sargables,
5685                 row_col_no);
5686   Item_equal *item_equal= field_item->get_item_equal();
5687   if (item_equal)
5688   {
5689     /*
5690       Add to the set of possible key values every substitution of
5691       the field for an equal field included into item_equal
5692     */
5693     Item_equal_fields_iterator it(*item_equal);
5694     while (it++)
5695     {
5696       Field *equal_field= it.get_curr_field();
5697       if (!field->eq(equal_field))
5698       {
5699         add_key_field(join, key_fields, and_level, cond, equal_field,
5700                       eq_func, val, num_values, usable_tables,
5701                       sargables, row_col_no);
5702       }
5703     }
5704   }
5705 }
5706 
5707 
5708 /**
5709   Check if an expression is a non-outer field.
5710 
5711   Checks if an expression is a field and belongs to the current select.
5712 
5713   @param   field  Item expression to check
5714 
5715   @return boolean
5716      @retval TRUE   the expression is a local field
5717      @retval FALSE  it's something else
5718 */
5719 
5720 static bool
is_local_field(Item * field)5721 is_local_field (Item *field)
5722 {
5723   return field->real_item()->type() == Item::FIELD_ITEM
5724      && !(field->used_tables() & OUTER_REF_TABLE_BIT)
5725     && !((Item_field *)field->real_item())->get_depended_from();
5726 }
5727 
5728 
5729 /*
5730   In this and other functions, and_level is a number that is ever-growing
5731   and is different for the contents of every AND or OR clause. For example,
5732   when processing clause
5733 
5734      (a AND b AND c) OR (x AND y)
5735 
5736   we'll have
5737    * KEY_FIELD elements for (a AND b AND c) are assigned and_level=1
5738    * KEY_FIELD elements for (x AND y) are assigned and_level=2
5739    * OR operation is performed, and whatever elements are left after it are
5740      assigned and_level=3.
5741 
5742   The primary reason for having and_level attribute is the OR operation which
5743   uses and_level to mark KEY_FIELDs that should get into the result of the OR
5744   operation
5745 */
5746 
5747 
5748 void
add_key_fields(JOIN * join,KEY_FIELD ** key_fields,uint * and_level,table_map usable_tables,SARGABLE_PARAM ** sargables)5749 Item_cond_and::add_key_fields(JOIN *join, KEY_FIELD **key_fields,
5750                               uint *and_level, table_map usable_tables,
5751                               SARGABLE_PARAM **sargables)
5752 {
5753   List_iterator_fast<Item> li(*argument_list());
5754   KEY_FIELD *org_key_fields= *key_fields;
5755 
5756   Item *item;
5757   while ((item=li++))
5758     item->add_key_fields(join, key_fields, and_level, usable_tables,
5759                          sargables);
5760   for (; org_key_fields != *key_fields ; org_key_fields++)
5761     org_key_fields->level= *and_level;
5762 }
5763 
5764 
5765 void
add_key_fields(JOIN * join,KEY_FIELD ** key_fields,uint * and_level,table_map usable_tables,SARGABLE_PARAM ** sargables)5766 Item_cond::add_key_fields(JOIN *join, KEY_FIELD **key_fields,
5767                           uint *and_level, table_map usable_tables,
5768                           SARGABLE_PARAM **sargables)
5769 {
5770   List_iterator_fast<Item> li(*argument_list());
5771   KEY_FIELD *org_key_fields= *key_fields;
5772 
5773   (*and_level)++;
5774   (li++)->add_key_fields(join, key_fields, and_level, usable_tables,
5775                          sargables);
5776   Item *item;
5777   while ((item=li++))
5778   {
5779     KEY_FIELD *start_key_fields= *key_fields;
5780     (*and_level)++;
5781     item->add_key_fields(join, key_fields, and_level, usable_tables,
5782                          sargables);
5783     *key_fields= merge_key_fields(org_key_fields,start_key_fields,
5784                                   *key_fields, ++(*and_level));
5785   }
5786 }
5787 
5788 
5789 void
add_key_fields(JOIN * join,KEY_FIELD ** key_fields,uint * and_level,table_map usable_tables,SARGABLE_PARAM ** sargables)5790 Item_func_trig_cond::add_key_fields(JOIN *join, KEY_FIELD **key_fields,
5791                                     uint *and_level, table_map usable_tables,
5792                                     SARGABLE_PARAM **sargables)
5793 {
5794   /*
5795     Subquery optimization: Conditions that are pushed down into subqueries
5796     are wrapped into Item_func_trig_cond. We process the wrapped condition
5797     but need to set cond_guard for KEYUSE elements generated from it.
5798   */
5799   if (!join->group_list && !join->order &&
5800       join->unit->item &&
5801       join->unit->item->substype() == Item_subselect::IN_SUBS &&
5802       !join->unit->is_unit_op())
5803   {
5804     KEY_FIELD *save= *key_fields;
5805     args[0]->add_key_fields(join, key_fields, and_level, usable_tables,
5806                             sargables);
5807     // Indicate that this ref access candidate is for subquery lookup:
5808     for (; save != *key_fields; save++)
5809       save->cond_guard= get_trig_var();
5810   }
5811 }
5812 
5813 
5814 void
add_key_fields(JOIN * join,KEY_FIELD ** key_fields,uint * and_level,table_map usable_tables,SARGABLE_PARAM ** sargables)5815 Item_func_between::add_key_fields(JOIN *join, KEY_FIELD **key_fields,
5816                                   uint *and_level, table_map usable_tables,
5817                                   SARGABLE_PARAM **sargables)
5818 {
5819   /*
5820     Build list of possible keys for 'a BETWEEN low AND high'.
5821     It is handled similar to the equivalent condition
5822     'a >= low AND a <= high':
5823   */
5824   Item_field *field_item;
5825   bool equal_func= false;
5826   uint num_values= 2;
5827 
5828   bool binary_cmp= (args[0]->real_item()->type() == Item::FIELD_ITEM)
5829         ? ((Item_field*) args[0]->real_item())->field->binary()
5830         : true;
5831   /*
5832     Additional optimization: If 'low = high':
5833     Handle as if the condition was "t.key = low".
5834   */
5835   if (!negated && args[1]->eq(args[2], binary_cmp))
5836   {
5837     equal_func= true;
5838     num_values= 1;
5839   }
5840 
5841   /*
5842     Append keys for 'field <cmp> value[]' if the
5843     condition is of the form::
5844     '<field> BETWEEN value[1] AND value[2]'
5845   */
5846   if (is_local_field(args[0]))
5847   {
5848     field_item= (Item_field *) (args[0]->real_item());
5849     add_key_equal_fields(join, key_fields, *and_level, this,
5850                          field_item, equal_func, &args[1],
5851                          num_values, usable_tables, sargables);
5852   }
5853   /*
5854     Append keys for 'value[0] <cmp> field' if the
5855     condition is of the form:
5856     'value[0] BETWEEN field1 AND field2'
5857   */
5858   for (uint i= 1; i <= num_values; i++)
5859   {
5860     if (is_local_field(args[i]))
5861     {
5862       field_item= (Item_field *) (args[i]->real_item());
5863       add_key_equal_fields(join, key_fields, *and_level, this,
5864                            field_item, equal_func, args,
5865                            1, usable_tables, sargables);
5866     }
5867   }
5868 }
5869 
5870 
5871 void
add_key_fields(JOIN * join,KEY_FIELD ** key_fields,uint * and_level,table_map usable_tables,SARGABLE_PARAM ** sargables)5872 Item_func_in::add_key_fields(JOIN *join, KEY_FIELD **key_fields,
5873                              uint *and_level, table_map usable_tables,
5874                              SARGABLE_PARAM **sargables)
5875 {
5876   if (is_local_field(args[0]) && !(used_tables() & OUTER_REF_TABLE_BIT))
5877   {
5878     DBUG_ASSERT(arg_count != 2);
5879     add_key_equal_fields(join, key_fields, *and_level, this,
5880                          (Item_field*) (args[0]->real_item()), false,
5881                          args + 1, arg_count - 1, usable_tables, sargables);
5882   }
5883   else if (key_item()->type() == Item::ROW_ITEM &&
5884            !(used_tables() & OUTER_REF_TABLE_BIT))
5885   {
5886     Item_row *key_row= (Item_row *) key_item();
5887     Item **key_col= key_row->addr(0);
5888     uint row_cols= key_row->cols();
5889     for (uint i= 0; i < row_cols; i++, key_col++)
5890     {
5891       if (is_local_field(*key_col))
5892       {
5893         Item_field *field_item= (Item_field *)((*key_col)->real_item());
5894         add_key_equal_fields(join, key_fields, *and_level, this,
5895                              field_item, false, args + 1, arg_count - 1,
5896                              usable_tables, sargables, i + 1);
5897       }
5898     }
5899   }
5900 
5901 }
5902 
5903 
5904 void
add_key_fields(JOIN * join,KEY_FIELD ** key_fields,uint * and_level,table_map usable_tables,SARGABLE_PARAM ** sargables)5905 Item_func_ne::add_key_fields(JOIN *join, KEY_FIELD **key_fields,
5906                              uint *and_level, table_map usable_tables,
5907                              SARGABLE_PARAM **sargables)
5908 {
5909   if (!(used_tables() & OUTER_REF_TABLE_BIT))
5910   {
5911     /*
5912       QQ: perhaps test for !is_local_field(args[1]) is not really needed here.
5913       Other comparison functions, e.g. Item_func_le, Item_func_gt, etc,
5914       do not have this test. See Item_bool_func2::add_key_fieldoptimize_op().
5915       Check with the optimizer team.
5916     */
5917     if (is_local_field(args[0]) && !is_local_field(args[1]))
5918       add_key_equal_fields(join, key_fields, *and_level, this,
5919                            (Item_field*) (args[0]->real_item()), false,
5920                            &args[1], 1, usable_tables, sargables);
5921     /*
5922       QQ: perhaps test for !is_local_field(args[0]) is not really needed here.
5923     */
5924     if (is_local_field(args[1]) && !is_local_field(args[0]))
5925       add_key_equal_fields(join, key_fields, *and_level, this,
5926                            (Item_field*) (args[1]->real_item()), false,
5927                            &args[0], 1, usable_tables, sargables);
5928   }
5929 }
5930 
5931 
5932 void
add_key_fields(JOIN * join,KEY_FIELD ** key_fields,uint * and_level,table_map usable_tables,SARGABLE_PARAM ** sargables)5933 Item_func_like::add_key_fields(JOIN *join, KEY_FIELD **key_fields,
5934                                uint *and_level, table_map usable_tables,
5935                                SARGABLE_PARAM **sargables)
5936 {
5937   if (is_local_field(args[0]) && with_sargable_pattern())
5938   {
5939     /*
5940       SELECT * FROM t1 WHERE field LIKE const_pattern
5941       const_pattern starts with a non-wildcard character
5942     */
5943     add_key_equal_fields(join, key_fields, *and_level, this,
5944                          (Item_field*) args[0]->real_item(), false,
5945                          args + 1, 1, usable_tables, sargables);
5946   }
5947 }
5948 
5949 
5950 void
add_key_fields_optimize_op(JOIN * join,KEY_FIELD ** key_fields,uint * and_level,table_map usable_tables,SARGABLE_PARAM ** sargables,bool equal_func)5951 Item_bool_func2::add_key_fields_optimize_op(JOIN *join, KEY_FIELD **key_fields,
5952                                             uint *and_level,
5953                                             table_map usable_tables,
5954                                             SARGABLE_PARAM **sargables,
5955                                             bool equal_func)
5956 {
5957   /* If item is of type 'field op field/constant' add it to key_fields */
5958   if (is_local_field(args[0]))
5959   {
5960     add_key_equal_fields(join, key_fields, *and_level, this,
5961                          (Item_field*) args[0]->real_item(), equal_func,
5962                          args + 1, 1, usable_tables, sargables);
5963   }
5964   if (is_local_field(args[1]))
5965   {
5966     add_key_equal_fields(join, key_fields, *and_level, this,
5967                          (Item_field*) args[1]->real_item(), equal_func,
5968                          args, 1, usable_tables, sargables);
5969   }
5970 }
5971 
5972 
5973 void
add_key_fields(JOIN * join,KEY_FIELD ** key_fields,uint * and_level,table_map usable_tables,SARGABLE_PARAM ** sargables)5974 Item_func_null_predicate::add_key_fields(JOIN *join, KEY_FIELD **key_fields,
5975                                          uint *and_level,
5976                                          table_map usable_tables,
5977                                          SARGABLE_PARAM **sargables)
5978 {
5979   /* column_name IS [NOT] NULL */
5980   if (is_local_field(args[0]) && !(used_tables() & OUTER_REF_TABLE_BIT))
5981   {
5982     Item *tmp= new (join->thd->mem_root) Item_null(join->thd);
5983     if (unlikely(!tmp))                       // Should never be true
5984       return;
5985     add_key_equal_fields(join, key_fields, *and_level, this,
5986                          (Item_field*) args[0]->real_item(),
5987                          functype() == Item_func::ISNULL_FUNC,
5988                          &tmp, 1, usable_tables, sargables);
5989   }
5990 }
5991 
5992 
5993 void
add_key_fields(JOIN * join,KEY_FIELD ** key_fields,uint * and_level,table_map usable_tables,SARGABLE_PARAM ** sargables)5994 Item_equal::add_key_fields(JOIN *join, KEY_FIELD **key_fields,
5995                            uint *and_level, table_map usable_tables,
5996                            SARGABLE_PARAM **sargables)
5997 {
5998   Item *const_item2= get_const();
5999   Item_equal_fields_iterator it(*this);
6000   if (const_item2)
6001   {
6002 
6003     /*
6004       For each field field1 from item_equal consider the equality
6005       field1=const_item as a condition allowing an index access of the table
6006       with field1 by the keys value of field1.
6007     */
6008     while (it++)
6009     {
6010       Field *equal_field= it.get_curr_field();
6011       add_key_field(join, key_fields, *and_level, this, equal_field,
6012                     TRUE, &const_item2, 1, usable_tables, sargables);
6013     }
6014   }
6015   else
6016   {
6017     /*
6018       Consider all pairs of different fields included into item_equal.
6019       For each of them (field1, field1) consider the equality
6020       field1=field2 as a condition allowing an index access of the table
6021       with field1 by the keys value of field2.
6022     */
6023     Item_equal_fields_iterator fi(*this);
6024     while (fi++)
6025     {
6026       Field *field= fi.get_curr_field();
6027       Item *item;
6028       while ((item= it++))
6029       {
6030         Field *equal_field= it.get_curr_field();
6031         if (!field->eq(equal_field))
6032         {
6033           add_key_field(join, key_fields, *and_level, this, field,
6034                         TRUE, &item, 1, usable_tables,
6035                         sargables);
6036         }
6037       }
6038       it.rewind();
6039     }
6040   }
6041 }
6042 
6043 
6044 static uint
max_part_bit(key_part_map bits)6045 max_part_bit(key_part_map bits)
6046 {
6047   uint found;
6048   for (found=0; bits & 1 ; found++,bits>>=1) ;
6049   return found;
6050 }
6051 
6052 
6053 /**
6054   Add a new keuse to the specified array of KEYUSE objects
6055 
6056   @param[in,out]  keyuse_array  array of keyuses to be extended
6057   @param[in]      key_field     info on the key use occurrence
6058   @param[in]      key           key number for the keyse to be added
6059   @param[in]      part          key part for the keyuse to be added
6060 
6061   @note
6062   The function builds a new KEYUSE object for a key use utilizing the info
6063   on the left and right parts of the given key use  extracted from the
6064   structure key_field, the key number and key part for this key use.
6065   The built object is added to the dynamic array keyuse_array.
6066 
6067   @retval         0             the built object is succesfully added
6068   @retval         1             otherwise
6069 */
6070 
6071 static bool
add_keyuse(DYNAMIC_ARRAY * keyuse_array,KEY_FIELD * key_field,uint key,uint part)6072 add_keyuse(DYNAMIC_ARRAY *keyuse_array, KEY_FIELD *key_field,
6073           uint key, uint part)
6074 {
6075   KEYUSE keyuse;
6076   Field *field= key_field->field;
6077 
6078   keyuse.table= field->table;
6079   keyuse.val= key_field->val;
6080   keyuse.key= key;
6081   if (!is_hash_join_key_no(key))
6082   {
6083     keyuse.keypart=part;
6084     keyuse.keypart_map= (key_part_map) 1 << part;
6085   }
6086   else
6087   {
6088     keyuse.keypart= field->field_index;
6089     keyuse.keypart_map= (key_part_map) 0;
6090   }
6091   keyuse.used_tables= key_field->val->used_tables();
6092   keyuse.optimize= key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL;
6093   keyuse.ref_table_rows= 0;
6094   keyuse.null_rejecting= key_field->null_rejecting;
6095   keyuse.cond_guard= key_field->cond_guard;
6096   keyuse.sj_pred_no= key_field->sj_pred_no;
6097   keyuse.validity_ref= 0;
6098   return (insert_dynamic(keyuse_array,(uchar*) &keyuse));
6099 }
6100 
6101 
6102 /*
6103   Add all keys with uses 'field' for some keypart
6104   If field->and_level != and_level then only mark key_part as const_part
6105 
6106   RETURN
6107    0 - OK
6108    1 - Out of memory.
6109 */
6110 
6111 static bool
add_key_part(DYNAMIC_ARRAY * keyuse_array,KEY_FIELD * key_field)6112 add_key_part(DYNAMIC_ARRAY *keyuse_array, KEY_FIELD *key_field)
6113 {
6114   Field *field=key_field->field;
6115   TABLE *form= field->table;
6116 
6117   if (key_field->eq_func && !(key_field->optimize & KEY_OPTIMIZE_EXISTS))
6118   {
6119     for (uint key=0 ; key < form->s->keys ; key++)
6120     {
6121       if (!(form->keys_in_use_for_query.is_set(key)))
6122 	continue;
6123       if (form->key_info[key].flags & (HA_FULLTEXT | HA_SPATIAL))
6124 	continue;    // ToDo: ft-keys in non-ft queries.   SerG
6125 
6126       KEY *keyinfo= form->key_info+key;
6127       uint key_parts= form->actual_n_key_parts(keyinfo);
6128       for (uint part=0 ; part <  key_parts ; part++)
6129       {
6130         if (field->eq(form->key_info[key].key_part[part].field) &&
6131             field->can_optimize_keypart_ref(key_field->cond, key_field->val))
6132 	{
6133           if (add_keyuse(keyuse_array, key_field, key, part))
6134             return TRUE;
6135 	}
6136       }
6137     }
6138     if (field->hash_join_is_possible() &&
6139         (key_field->optimize & KEY_OPTIMIZE_EQ) &&
6140         key_field->val->used_tables())
6141     {
6142       if (!field->can_optimize_hash_join(key_field->cond, key_field->val))
6143         return false;
6144       if (form->is_splittable())
6145         form->add_splitting_info_for_key_field(key_field);
6146       /*
6147         If a key use is extracted from an equi-join predicate then it is
6148         added not only as a key use for every index whose component can
6149         be evalusted utilizing this key use, but also as a key use for
6150         hash join. Such key uses are marked with a special key number.
6151       */
6152       if (add_keyuse(keyuse_array, key_field, get_hash_join_key_no(), 0))
6153         return TRUE;
6154     }
6155   }
6156   return FALSE;
6157 }
6158 
6159 static bool
add_ft_keys(DYNAMIC_ARRAY * keyuse_array,JOIN_TAB * stat,COND * cond,table_map usable_tables)6160 add_ft_keys(DYNAMIC_ARRAY *keyuse_array,
6161             JOIN_TAB *stat,COND *cond,table_map usable_tables)
6162 {
6163   Item_func_match *cond_func=NULL;
6164 
6165   if (!cond)
6166     return FALSE;
6167 
6168   if (cond->type() == Item::FUNC_ITEM)
6169   {
6170     Item_func *func=(Item_func *)cond;
6171     Item_func::Functype functype=  func->functype();
6172     if (functype == Item_func::FT_FUNC)
6173       cond_func=(Item_func_match *)cond;
6174     else if (func->argument_count() == 2)
6175     {
6176       Item *arg0=(Item *)(func->arguments()[0]),
6177            *arg1=(Item *)(func->arguments()[1]);
6178       if (arg1->const_item() && arg1->cols() == 1 &&
6179            arg0->type() == Item::FUNC_ITEM &&
6180            ((Item_func *) arg0)->functype() == Item_func::FT_FUNC &&
6181           ((functype == Item_func::GE_FUNC && arg1->val_real() > 0) ||
6182            (functype == Item_func::GT_FUNC && arg1->val_real() >=0)))
6183         cond_func= (Item_func_match *) arg0;
6184       else if (arg0->const_item() && arg0->cols() == 1 &&
6185                 arg1->type() == Item::FUNC_ITEM &&
6186                 ((Item_func *) arg1)->functype() == Item_func::FT_FUNC &&
6187                ((functype == Item_func::LE_FUNC && arg0->val_real() > 0) ||
6188                 (functype == Item_func::LT_FUNC && arg0->val_real() >=0)))
6189         cond_func= (Item_func_match *) arg1;
6190     }
6191   }
6192   else if (cond->type() == Item::COND_ITEM)
6193   {
6194     List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
6195 
6196     if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
6197     {
6198       Item *item;
6199       while ((item=li++))
6200       {
6201         if (add_ft_keys(keyuse_array,stat,item,usable_tables))
6202           return TRUE;
6203       }
6204     }
6205   }
6206 
6207   if (!cond_func || cond_func->key == NO_SUCH_KEY ||
6208       !(usable_tables & cond_func->table->map))
6209     return FALSE;
6210 
6211   KEYUSE keyuse;
6212   keyuse.table= cond_func->table;
6213   keyuse.val =  cond_func;
6214   keyuse.key =  cond_func->key;
6215   keyuse.keypart= FT_KEYPART;
6216   keyuse.used_tables=cond_func->key_item()->used_tables();
6217   keyuse.optimize= 0;
6218   keyuse.ref_table_rows= 0;
6219   keyuse.keypart_map= 0;
6220   keyuse.sj_pred_no= UINT_MAX;
6221   keyuse.validity_ref= 0;
6222   keyuse.null_rejecting= FALSE;
6223   return insert_dynamic(keyuse_array,(uchar*) &keyuse);
6224 }
6225 
6226 
6227 static int
sort_keyuse(KEYUSE * a,KEYUSE * b)6228 sort_keyuse(KEYUSE *a,KEYUSE *b)
6229 {
6230   int res;
6231   if (a->table->tablenr != b->table->tablenr)
6232     return (int) (a->table->tablenr - b->table->tablenr);
6233   if (a->key != b->key)
6234     return (int) (a->key - b->key);
6235   if (a->key == MAX_KEY && b->key == MAX_KEY &&
6236       a->used_tables != b->used_tables)
6237     return (int) ((ulong) a->used_tables - (ulong) b->used_tables);
6238   if (a->keypart != b->keypart)
6239     return (int) (a->keypart - b->keypart);
6240   // Place const values before other ones
6241   if ((res= MY_TEST((a->used_tables & ~OUTER_REF_TABLE_BIT)) -
6242        MY_TEST((b->used_tables & ~OUTER_REF_TABLE_BIT))))
6243     return res;
6244   /* Place rows that are not 'OPTIMIZE_REF_OR_NULL' first */
6245   return (int) ((a->optimize & KEY_OPTIMIZE_REF_OR_NULL) -
6246 		(b->optimize & KEY_OPTIMIZE_REF_OR_NULL));
6247 }
6248 
6249 
6250 /*
6251   Add to KEY_FIELD array all 'ref' access candidates within nested join.
6252 
6253     This function populates KEY_FIELD array with entries generated from the
6254     ON condition of the given nested join, and does the same for nested joins
6255     contained within this nested join.
6256 
6257   @param[in]      nested_join_table   Nested join pseudo-table to process
6258   @param[in,out]  end                 End of the key field array
6259   @param[in,out]  and_level           And-level
6260   @param[in,out]  sargables           Array of found sargable candidates
6261 
6262 
6263   @note
6264     We can add accesses to the tables that are direct children of this nested
6265     join (1), and are not inner tables w.r.t their neighbours (2).
6266 
6267     Example for #1 (outer brackets pair denotes nested join this function is
6268     invoked for):
6269     @code
6270      ... LEFT JOIN (t1 LEFT JOIN (t2 ... ) ) ON cond
6271     @endcode
6272     Example for #2:
6273     @code
6274      ... LEFT JOIN (t1 LEFT JOIN t2 ) ON cond
6275     @endcode
6276     In examples 1-2 for condition cond, we can add 'ref' access candidates to
6277     t1 only.
6278     Example #3:
6279     @code
6280      ... LEFT JOIN (t1, t2 LEFT JOIN t3 ON inner_cond) ON cond
6281     @endcode
6282     Here we can add 'ref' access candidates for t1 and t2, but not for t3.
6283 */
6284 
add_key_fields_for_nj(JOIN * join,TABLE_LIST * nested_join_table,KEY_FIELD ** end,uint * and_level,SARGABLE_PARAM ** sargables)6285 static void add_key_fields_for_nj(JOIN *join, TABLE_LIST *nested_join_table,
6286                                   KEY_FIELD **end, uint *and_level,
6287                                   SARGABLE_PARAM **sargables)
6288 {
6289   List_iterator<TABLE_LIST> li(nested_join_table->nested_join->join_list);
6290   List_iterator<TABLE_LIST> li2(nested_join_table->nested_join->join_list);
6291   bool have_another = FALSE;
6292   table_map tables= 0;
6293   TABLE_LIST *table;
6294   DBUG_ASSERT(nested_join_table->nested_join);
6295 
6296   while ((table= li++) || (have_another && (li=li2, have_another=FALSE,
6297                                             (table= li++))))
6298   {
6299     if (table->nested_join)
6300     {
6301       if (!table->on_expr)
6302       {
6303         /* It's a semi-join nest. Walk into it as if it wasn't a nest */
6304         have_another= TRUE;
6305         li2= li;
6306         li= List_iterator<TABLE_LIST>(table->nested_join->join_list);
6307       }
6308       else
6309         add_key_fields_for_nj(join, table, end, and_level, sargables);
6310     }
6311     else
6312       if (!table->on_expr)
6313         tables |= table->table->map;
6314   }
6315   if (nested_join_table->on_expr)
6316     nested_join_table->on_expr->add_key_fields(join, end, and_level, tables,
6317                                                sargables);
6318 }
6319 
6320 
count_cond_for_nj(SELECT_LEX * sel,TABLE_LIST * nested_join_table)6321 void count_cond_for_nj(SELECT_LEX *sel, TABLE_LIST *nested_join_table)
6322 {
6323   List_iterator<TABLE_LIST> li(nested_join_table->nested_join->join_list);
6324   List_iterator<TABLE_LIST> li2(nested_join_table->nested_join->join_list);
6325   bool have_another = FALSE;
6326   TABLE_LIST *table;
6327 
6328   while ((table= li++) || (have_another && (li=li2, have_another=FALSE,
6329                                             (table= li++))))
6330   if (table->nested_join)
6331   {
6332     if (!table->on_expr)
6333     {
6334       /* It's a semi-join nest. Walk into it as if it wasn't a nest */
6335       have_another= TRUE;
6336       li2= li;
6337       li= List_iterator<TABLE_LIST>(table->nested_join->join_list);
6338     }
6339     else
6340       count_cond_for_nj(sel, table);
6341   }
6342   if (nested_join_table->on_expr)
6343     nested_join_table->on_expr->walk(&Item::count_sargable_conds, 0, sel);
6344 
6345 }
6346 
6347 /**
6348   Update keyuse array with all possible keys we can use to fetch rows.
6349 
6350   @param       thd
6351   @param[out]  keyuse         Put here ordered array of KEYUSE structures
6352   @param       join_tab       Array in tablenr_order
6353   @param       tables         Number of tables in join
6354   @param       cond           WHERE condition (note that the function analyzes
6355                               join_tab[i]->on_expr too)
6356   @param       normal_tables  Tables not inner w.r.t some outer join (ones
6357                               for which we can make ref access based the WHERE
6358                               clause)
6359   @param       select_lex     current SELECT
6360   @param[out]  sargables      Array of found sargable candidates
6361 
6362    @retval
6363      0  OK
6364    @retval
6365      1  Out of memory.
6366 */
6367 
6368 static bool
update_ref_and_keys(THD * thd,DYNAMIC_ARRAY * keyuse,JOIN_TAB * join_tab,uint tables,COND * cond,table_map normal_tables,SELECT_LEX * select_lex,SARGABLE_PARAM ** sargables)6369 update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
6370                     uint tables, COND *cond, table_map normal_tables,
6371                     SELECT_LEX *select_lex, SARGABLE_PARAM **sargables)
6372 {
6373   uint	and_level,i;
6374   KEY_FIELD *key_fields, *end, *field;
6375   uint sz;
6376   uint m= MY_MAX(select_lex->max_equal_elems,1);
6377   DBUG_ENTER("update_ref_and_keys");
6378   DBUG_PRINT("enter", ("normal_tables: %llx", normal_tables));
6379 
6380   SELECT_LEX *sel=thd->lex->current_select;
6381   sel->cond_count= 0;
6382   sel->between_count= 0;
6383   if (cond)
6384     cond->walk(&Item::count_sargable_conds, 0, sel);
6385   for (i=0 ; i < tables ; i++)
6386   {
6387     if (*join_tab[i].on_expr_ref)
6388       (*join_tab[i].on_expr_ref)->walk(&Item::count_sargable_conds, 0, sel);
6389   }
6390   {
6391     List_iterator<TABLE_LIST> li(*join_tab->join->join_list);
6392     TABLE_LIST *table;
6393     while ((table= li++))
6394     {
6395       if (table->nested_join)
6396         count_cond_for_nj(sel, table);
6397     }
6398   }
6399 
6400   /*
6401     We use the same piece of memory to store both  KEY_FIELD
6402     and SARGABLE_PARAM structure.
6403     KEY_FIELD values are placed at the beginning this memory
6404     while  SARGABLE_PARAM values are put at the end.
6405     All predicates that are used to fill arrays of KEY_FIELD
6406     and SARGABLE_PARAM structures have at most 2 arguments
6407     except BETWEEN predicates that have 3 arguments and
6408     IN predicates.
6409     This any predicate if it's not BETWEEN/IN can be used
6410     directly to fill at most 2 array elements, either of KEY_FIELD
6411     or SARGABLE_PARAM type. For a BETWEEN predicate 3 elements
6412     can be filled as this predicate is considered as
6413     saragable with respect to each of its argument.
6414     An IN predicate can require at most 1 element as currently
6415     it is considered as sargable only for its first argument.
6416     Multiple equality can add  elements that are filled after
6417     substitution of field arguments by equal fields. There
6418     can be not more than select_lex->max_equal_elems such
6419     substitutions.
6420   */
6421   sz= MY_MAX(sizeof(KEY_FIELD),sizeof(SARGABLE_PARAM))*
6422     ((sel->cond_count*2 + sel->between_count)*m+1);
6423   if (!(key_fields=(KEY_FIELD*)	thd->alloc(sz)))
6424     DBUG_RETURN(TRUE); /* purecov: inspected */
6425   and_level= 0;
6426   field= end= key_fields;
6427   *sargables= (SARGABLE_PARAM *) key_fields +
6428                 (sz - sizeof((*sargables)[0].field))/sizeof(SARGABLE_PARAM);
6429   /* set a barrier for the array of SARGABLE_PARAM */
6430   (*sargables)[0].field= 0;
6431 
6432   if (my_init_dynamic_array2(keyuse, sizeof(KEYUSE),
6433                              thd->alloc(sizeof(KEYUSE) * 20), 20, 64,
6434                              MYF(MY_THREAD_SPECIFIC)))
6435     DBUG_RETURN(TRUE);
6436 
6437   if (cond)
6438   {
6439     KEY_FIELD *saved_field= field;
6440     cond->add_key_fields(join_tab->join, &end, &and_level, normal_tables,
6441                          sargables);
6442     for (; field != end ; field++)
6443     {
6444 
6445       /* Mark that we can optimize LEFT JOIN */
6446       if (field->val->type() == Item::NULL_ITEM &&
6447 	  !field->field->real_maybe_null())
6448 	field->field->table->reginfo.not_exists_optimize=1;
6449     }
6450     field= saved_field;
6451   }
6452   for (i=0 ; i < tables ; i++)
6453   {
6454     /*
6455       Block the creation of keys for inner tables of outer joins.
6456       Here only the outer joins that can not be converted to
6457       inner joins are left and all nests that can be eliminated
6458       are flattened.
6459       In the future when we introduce conditional accesses
6460       for inner tables in outer joins these keys will be taken
6461       into account as well.
6462     */
6463     if (*join_tab[i].on_expr_ref)
6464       (*join_tab[i].on_expr_ref)->add_key_fields(join_tab->join, &end,
6465                                                  &and_level,
6466                                                  join_tab[i].table->map,
6467                                                  sargables);
6468   }
6469 
6470   /* Process ON conditions for the nested joins */
6471   {
6472     List_iterator<TABLE_LIST> li(*join_tab->join->join_list);
6473     TABLE_LIST *table;
6474     while ((table= li++))
6475     {
6476       if (table->nested_join)
6477         add_key_fields_for_nj(join_tab->join, table, &end, &and_level,
6478                               sargables);
6479     }
6480   }
6481 
6482   /* fill keyuse with found key parts */
6483   for ( ; field != end ; field++)
6484   {
6485     if (add_key_part(keyuse,field))
6486       DBUG_RETURN(TRUE);
6487   }
6488 
6489   if (select_lex->ftfunc_list->elements)
6490   {
6491     if (add_ft_keys(keyuse,join_tab,cond,normal_tables))
6492       DBUG_RETURN(TRUE);
6493   }
6494 
6495   DBUG_RETURN(FALSE);
6496 }
6497 
6498 
6499 /**
6500   Sort the array of possible keys and remove the following key parts:
6501   - ref if there is a keypart which is a ref and a const.
6502     (e.g. if there is a key(a,b) and the clause is a=3 and b=7 and b=t2.d,
6503     then we skip the key part corresponding to b=t2.d)
6504   - keyparts without previous keyparts
6505     (e.g. if there is a key(a,b,c) but only b < 5 (or a=2 and c < 3) is
6506     used in the query, we drop the partial key parts from consideration).
6507   Special treatment for ft-keys.
6508 */
6509 
sort_and_filter_keyuse(THD * thd,DYNAMIC_ARRAY * keyuse,bool skip_unprefixed_keyparts)6510 bool sort_and_filter_keyuse(THD *thd, DYNAMIC_ARRAY *keyuse,
6511                             bool skip_unprefixed_keyparts)
6512 {
6513   KEYUSE key_end, *prev, *save_pos, *use;
6514   uint found_eq_constant, i;
6515 
6516   DBUG_ASSERT(keyuse->elements);
6517 
6518   my_qsort(keyuse->buffer, keyuse->elements, sizeof(KEYUSE),
6519            (qsort_cmp) sort_keyuse);
6520 
6521   bzero((char*) &key_end, sizeof(key_end));    /* Add for easy testing */
6522   if (insert_dynamic(keyuse, (uchar*) &key_end))
6523     return TRUE;
6524 
6525   if (optimizer_flag(thd, OPTIMIZER_SWITCH_DERIVED_WITH_KEYS))
6526     generate_derived_keys(keyuse);
6527 
6528   use= save_pos= dynamic_element(keyuse,0,KEYUSE*);
6529   prev= &key_end;
6530   found_eq_constant= 0;
6531   for (i=0 ; i < keyuse->elements-1 ; i++,use++)
6532   {
6533     if (!use->is_for_hash_join())
6534     {
6535       if (!(use->used_tables & ~OUTER_REF_TABLE_BIT) &&
6536           use->optimize != KEY_OPTIMIZE_REF_OR_NULL)
6537         use->table->const_key_parts[use->key]|= use->keypart_map;
6538       if (use->keypart != FT_KEYPART)
6539       {
6540         if (use->key == prev->key && use->table == prev->table)
6541         {
6542           if ((prev->keypart+1 < use->keypart && skip_unprefixed_keyparts) ||
6543               (prev->keypart == use->keypart && found_eq_constant))
6544             continue;				/* remove */
6545         }
6546         else if (use->keypart != 0 && skip_unprefixed_keyparts)
6547           continue; /* remove - first found must be 0 */
6548       }
6549 
6550       prev= use;
6551       found_eq_constant= !use->used_tables;
6552       use->table->reginfo.join_tab->checked_keys.set_bit(use->key);
6553     }
6554     /*
6555       Old gcc used a memcpy(), which is undefined if save_pos==use:
6556       http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19410
6557       http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39480
6558       This also disables a valgrind warning, so better to have the test.
6559     */
6560     if (save_pos != use)
6561       *save_pos= *use;
6562     /* Save ptr to first use */
6563     if (!use->table->reginfo.join_tab->keyuse)
6564       use->table->reginfo.join_tab->keyuse= save_pos;
6565     save_pos++;
6566   }
6567   i= (uint) (save_pos-(KEYUSE*) keyuse->buffer);
6568   (void) set_dynamic(keyuse,(uchar*) &key_end,i);
6569   keyuse->elements= i;
6570 
6571   return FALSE;
6572 }
6573 
6574 
6575 /**
6576   Update some values in keyuse for faster choose_plan() loop.
6577 */
6578 
optimize_keyuse(JOIN * join,DYNAMIC_ARRAY * keyuse_array)6579 void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array)
6580 {
6581   KEYUSE *end,*keyuse= dynamic_element(keyuse_array, 0, KEYUSE*);
6582 
6583   for (end= keyuse+ keyuse_array->elements ; keyuse < end ; keyuse++)
6584   {
6585     table_map map;
6586     /*
6587       If we find a ref, assume this table matches a proportional
6588       part of this table.
6589       For example 100 records matching a table with 5000 records
6590       gives 5000/100 = 50 records per key
6591       Constant tables are ignored.
6592       To avoid bad matches, we don't make ref_table_rows less than 100.
6593     */
6594     keyuse->ref_table_rows= ~(ha_rows) 0;	// If no ref
6595     if (keyuse->used_tables &
6596 	(map= (keyuse->used_tables & ~join->const_table_map &
6597 	       ~OUTER_REF_TABLE_BIT)))
6598     {
6599       uint n_tables= my_count_bits(map);
6600       if (n_tables == 1)			// Only one table
6601       {
6602         DBUG_ASSERT(!(map & PSEUDO_TABLE_BITS)); // Must be a real table
6603         Table_map_iterator it(map);
6604         int tablenr= it.next_bit();
6605         DBUG_ASSERT(tablenr != Table_map_iterator::BITMAP_END);
6606 	TABLE *tmp_table=join->table[tablenr];
6607         if (tmp_table) // already created
6608           keyuse->ref_table_rows= MY_MAX(tmp_table->file->stats.records, 100);
6609       }
6610     }
6611     /*
6612       Outer reference (external field) is constant for single executing
6613       of subquery
6614     */
6615     if (keyuse->used_tables == OUTER_REF_TABLE_BIT)
6616       keyuse->ref_table_rows= 1;
6617   }
6618 }
6619 
6620 
6621 /**
6622   Check for the presence of AGGFN(DISTINCT a) queries that may be subject
6623   to loose index scan.
6624 
6625 
6626   Check if the query is a subject to AGGFN(DISTINCT) using loose index scan
6627   (QUICK_GROUP_MIN_MAX_SELECT).
6628   Optionally (if out_args is supplied) will push the arguments of
6629   AGGFN(DISTINCT) to the list
6630 
6631   Check for every COUNT(DISTINCT), AVG(DISTINCT) or
6632   SUM(DISTINCT). These can be resolved by Loose Index Scan as long
6633   as all the aggregate distinct functions refer to the same
6634   fields. Thus:
6635 
6636   SELECT AGGFN(DISTINCT a, b), AGGFN(DISTINCT b, a)... => can use LIS
6637   SELECT AGGFN(DISTINCT a),    AGGFN(DISTINCT a)   ... => can use LIS
6638   SELECT AGGFN(DISTINCT a, b), AGGFN(DISTINCT a)   ... => cannot use LIS
6639   SELECT AGGFN(DISTINCT a),    AGGFN(DISTINCT b)   ... => cannot use LIS
6640   etc.
6641 
6642   @param      join       the join to check
6643   @param[out] out_args   Collect the arguments of the aggregate functions
6644                          to a list. We don't worry about duplicates as
6645                          these will be sorted out later in
6646                          get_best_group_min_max.
6647 
6648   @return                does the query qualify for indexed AGGFN(DISTINCT)
6649     @retval   true       it does
6650     @retval   false      AGGFN(DISTINCT) must apply distinct in it.
6651 */
6652 
6653 bool
is_indexed_agg_distinct(JOIN * join,List<Item_field> * out_args)6654 is_indexed_agg_distinct(JOIN *join, List<Item_field> *out_args)
6655 {
6656   Item_sum **sum_item_ptr;
6657   bool result= false;
6658   Field_map first_aggdistinct_fields;
6659 
6660   if (join->table_count != 1 ||                    /* reference more than 1 table */
6661       join->select_distinct ||                /* or a DISTINCT */
6662       join->select_lex->olap == ROLLUP_TYPE)  /* Check (B3) for ROLLUP */
6663     return false;
6664 
6665   if (join->make_sum_func_list(join->all_fields, join->fields_list, true))
6666     return false;
6667 
6668   for (sum_item_ptr= join->sum_funcs; *sum_item_ptr; sum_item_ptr++)
6669   {
6670     Item_sum *sum_item= *sum_item_ptr;
6671     Field_map cur_aggdistinct_fields;
6672     Item *expr;
6673     /* aggregate is not AGGFN(DISTINCT) or more than 1 argument to it */
6674     switch (sum_item->sum_func())
6675     {
6676       case Item_sum::MIN_FUNC:
6677       case Item_sum::MAX_FUNC:
6678         continue;
6679       case Item_sum::COUNT_DISTINCT_FUNC:
6680         break;
6681       case Item_sum::AVG_DISTINCT_FUNC:
6682       case Item_sum::SUM_DISTINCT_FUNC:
6683         if (sum_item->get_arg_count() == 1)
6684           break;
6685         /* fall through */
6686       default: return false;
6687     }
6688     /*
6689       We arrive here for every COUNT(DISTINCT),AVG(DISTINCT) or SUM(DISTINCT).
6690       Collect the arguments of the aggregate functions to a list.
6691       We don't worry about duplicates as these will be sorted out later in
6692       get_best_group_min_max
6693     */
6694     for (uint i= 0; i < sum_item->get_arg_count(); i++)
6695     {
6696       expr= sum_item->get_arg(i);
6697       /* The AGGFN(DISTINCT) arg is not an attribute? */
6698       if (expr->real_item()->type() != Item::FIELD_ITEM)
6699         return false;
6700 
6701       Item_field* item= static_cast<Item_field*>(expr->real_item());
6702       if (out_args)
6703         out_args->push_back(item, join->thd->mem_root);
6704 
6705       cur_aggdistinct_fields.set_bit(item->field->field_index);
6706       result= true;
6707     }
6708     /*
6709       If there are multiple aggregate functions, make sure that they all
6710       refer to exactly the same set of columns.
6711     */
6712     if (first_aggdistinct_fields.is_clear_all())
6713       first_aggdistinct_fields.merge(cur_aggdistinct_fields);
6714     else if (first_aggdistinct_fields != cur_aggdistinct_fields)
6715       return false;
6716   }
6717 
6718   return result;
6719 }
6720 
6721 
6722 /**
6723   Discover the indexes that can be used for GROUP BY or DISTINCT queries.
6724 
6725   If the query has a GROUP BY clause, find all indexes that contain all
6726   GROUP BY fields, and add those indexes to join->const_keys.
6727 
6728   If the query has a DISTINCT clause, find all indexes that contain all
6729   SELECT fields, and add those indexes to join->const_keys.
6730   This allows later on such queries to be processed by a
6731   QUICK_GROUP_MIN_MAX_SELECT.
6732 
6733   @param join
6734   @param join_tab
6735 
6736   @return
6737     None
6738 */
6739 
6740 static void
add_group_and_distinct_keys(JOIN * join,JOIN_TAB * join_tab)6741 add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab)
6742 {
6743   List<Item_field> indexed_fields;
6744   List_iterator<Item_field> indexed_fields_it(indexed_fields);
6745   ORDER      *cur_group;
6746   Item_field *cur_item;
6747   key_map possible_keys(0);
6748 
6749   if (join->group_list)
6750   { /* Collect all query fields referenced in the GROUP clause. */
6751     for (cur_group= join->group_list; cur_group; cur_group= cur_group->next)
6752       (*cur_group->item)->walk(&Item::collect_item_field_processor, 0,
6753                                &indexed_fields);
6754   }
6755   else if (join->select_distinct)
6756   { /* Collect all query fields referenced in the SELECT clause. */
6757     List<Item> &select_items= join->fields_list;
6758     List_iterator<Item> select_items_it(select_items);
6759     Item *item;
6760     while ((item= select_items_it++))
6761       item->walk(&Item::collect_item_field_processor, 0, &indexed_fields);
6762   }
6763   else if (join->tmp_table_param.sum_func_count &&
6764            is_indexed_agg_distinct(join, &indexed_fields))
6765   {
6766     join->sort_and_group= 1;
6767   }
6768   else
6769     return;
6770 
6771   if (indexed_fields.elements == 0)
6772     return;
6773 
6774   /* Intersect the keys of all group fields. */
6775   cur_item= indexed_fields_it++;
6776   possible_keys.merge(cur_item->field->part_of_key);
6777   while ((cur_item= indexed_fields_it++))
6778   {
6779     possible_keys.intersect(cur_item->field->part_of_key);
6780   }
6781 
6782   if (!possible_keys.is_clear_all())
6783     join_tab->const_keys.merge(possible_keys);
6784 }
6785 
6786 
6787 /*****************************************************************************
6788   Go through all combinations of not marked tables and find the one
6789   which uses least records
6790 *****************************************************************************/
6791 
6792 /** Save const tables first as used tables. */
6793 
set_position(JOIN * join,uint idx,JOIN_TAB * table,KEYUSE * key)6794 void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
6795 {
6796   join->positions[idx].table= table;
6797   join->positions[idx].key=key;
6798   join->positions[idx].records_read=1.0;	/* This is a const table */
6799   join->positions[idx].cond_selectivity= 1.0;
6800   join->positions[idx].ref_depend_map= 0;
6801 
6802 //  join->positions[idx].loosescan_key= MAX_KEY; /* Not a LooseScan */
6803   join->positions[idx].sj_strategy= SJ_OPT_NONE;
6804   join->positions[idx].use_join_buffer= FALSE;
6805 
6806   /* Move the const table as down as possible in best_ref */
6807   JOIN_TAB **pos=join->best_ref+idx+1;
6808   JOIN_TAB *next=join->best_ref[idx];
6809   for (;next != table ; pos++)
6810   {
6811     JOIN_TAB *tmp=pos[0];
6812     pos[0]=next;
6813     next=tmp;
6814   }
6815   join->best_ref[idx]=table;
6816   join->positions[idx].spl_plan= 0;
6817 }
6818 
6819 
6820 /*
6821   Estimate how many records we will get if we read just this table and apply
6822   a part of WHERE that can be checked for it.
6823 
6824   @detail
6825   Estimate how many records we will get if we
6826    - read the given table with its "independent" access method (either quick
6827      select or full table/index scan),
6828    - apply the part of WHERE that refers only to this table.
6829 
6830   @seealso
6831     table_cond_selectivity() produces selectivity of condition that is checked
6832     after joining rows from this table to rows from preceding tables.
6833 */
6834 
6835 inline
matching_candidates_in_table(JOIN_TAB * s,bool with_found_constraint,uint use_cond_selectivity)6836 double matching_candidates_in_table(JOIN_TAB *s, bool with_found_constraint,
6837                                      uint use_cond_selectivity)
6838 {
6839   ha_rows records;
6840   double dbl_records;
6841 
6842   if (use_cond_selectivity > 1)
6843   {
6844     TABLE *table= s->table;
6845     double sel= table->cond_selectivity;
6846     double table_records= (double)table->stat_records();
6847     dbl_records= table_records * sel;
6848     return dbl_records;
6849   }
6850 
6851   records = s->found_records;
6852 
6853   /*
6854     If there is a filtering condition on the table (i.e. ref analyzer found
6855     at least one "table.keyXpartY= exprZ", where exprZ refers only to tables
6856     preceding this table in the join order we're now considering), then
6857     assume that 25% of the rows will be filtered out by this condition.
6858 
6859     This heuristic is supposed to force tables used in exprZ to be before
6860     this table in join order.
6861   */
6862   if (with_found_constraint)
6863     records-= records/4;
6864 
6865     /*
6866       If applicable, get a more accurate estimate. Don't use the two
6867       heuristics at once.
6868     */
6869   if (s->table->quick_condition_rows != s->found_records)
6870     records= s->table->quick_condition_rows;
6871 
6872   dbl_records= (double)records;
6873   return dbl_records;
6874 }
6875 
6876 
6877 /**
6878   Find the best access path for an extension of a partial execution
6879   plan and add this path to the plan.
6880 
6881   The function finds the best access path to table 's' from the passed
6882   partial plan where an access path is the general term for any means to
6883   access the data in 's'. An access path may use either an index or a scan,
6884   whichever is cheaper. The input partial plan is passed via the array
6885   'join->positions' of length 'idx'. The chosen access method for 's' and its
6886   cost are stored in 'join->positions[idx]'.
6887 
6888   @param join             pointer to the structure providing all context info
6889                           for the query
6890   @param s                the table to be joined by the function
6891   @param thd              thread for the connection that submitted the query
6892   @param remaining_tables set of tables not included into the partial plan yet
6893   @param idx              the length of the partial plan
6894   @param disable_jbuf     TRUE<=> Don't use join buffering
6895   @param record_count     estimate for the number of records returned by the
6896                           partial plan
6897   @param pos              OUT Table access plan
6898   @param loose_scan_pos   OUT Table plan that uses loosescan, or set cost to
6899                               DBL_MAX if not possible.
6900 
6901   @return
6902     None
6903 */
6904 
6905 void
best_access_path(JOIN * join,JOIN_TAB * s,table_map remaining_tables,const POSITION * join_positions,uint idx,bool disable_jbuf,double record_count,POSITION * pos,POSITION * loose_scan_pos)6906 best_access_path(JOIN      *join,
6907                  JOIN_TAB  *s,
6908                  table_map remaining_tables,
6909                  const POSITION *join_positions,
6910                  uint      idx,
6911                  bool      disable_jbuf,
6912                  double    record_count,
6913                  POSITION *pos,
6914                  POSITION *loose_scan_pos)
6915 {
6916   THD *thd= join->thd;
6917   uint use_cond_selectivity= thd->variables.optimizer_use_condition_selectivity;
6918   KEYUSE *best_key=         0;
6919   uint best_max_key_part=   0;
6920   my_bool found_constraint= 0;
6921   double best=              DBL_MAX;
6922   double best_time=         DBL_MAX;
6923   double records=           DBL_MAX;
6924   table_map best_ref_depends_map= 0;
6925   double tmp;
6926   ha_rows rec;
6927   bool best_uses_jbuf= FALSE;
6928   MY_BITMAP *eq_join_set= &s->table->eq_join_set;
6929   KEYUSE *hj_start_key= 0;
6930   SplM_plan_info *spl_plan= 0;
6931 
6932   disable_jbuf= disable_jbuf || idx == join->const_tables;
6933 
6934   Loose_scan_opt loose_scan_opt;
6935   DBUG_ENTER("best_access_path");
6936 
6937   bitmap_clear_all(eq_join_set);
6938 
6939   loose_scan_opt.init(join, s, remaining_tables);
6940 
6941   if (s->table->is_splittable())
6942     spl_plan= s->choose_best_splitting(record_count, remaining_tables);
6943 
6944   if (s->keyuse)
6945   {                                            /* Use key if possible */
6946     KEYUSE *keyuse;
6947     KEYUSE *start_key=0;
6948     TABLE *table= s->table;
6949     double best_records= DBL_MAX;
6950     uint max_key_part=0;
6951 
6952     /* Test how we can use keys */
6953     rec= s->records/MATCHING_ROWS_IN_OTHER_TABLE;  // Assumed records/key
6954     for (keyuse=s->keyuse ; keyuse->table == table ;)
6955     {
6956       KEY *keyinfo;
6957       ulong key_flags;
6958       uint key_parts;
6959       key_part_map found_part= 0;
6960       key_part_map notnull_part=0; // key parts which won't have NULL in lookup tuple.
6961       table_map found_ref= 0;
6962       uint key= keyuse->key;
6963       bool ft_key=  (keyuse->keypart == FT_KEYPART);
6964       /* Bitmap of keyparts where the ref access is over 'keypart=const': */
6965       key_part_map const_part= 0;
6966       /* The or-null keypart in ref-or-null access: */
6967       key_part_map ref_or_null_part= 0;
6968       if (is_hash_join_key_no(key))
6969       {
6970         /*
6971           Hash join as any join employing join buffer can be used to join
6972           only those tables that are joined after the first non const table
6973 	*/
6974         if (!(remaining_tables & keyuse->used_tables) &&
6975             idx > join->const_tables)
6976         {
6977           if (!hj_start_key)
6978             hj_start_key= keyuse;
6979           bitmap_set_bit(eq_join_set, keyuse->keypart);
6980         }
6981         keyuse++;
6982         continue;
6983       }
6984 
6985       keyinfo= table->key_info+key;
6986       key_parts= table->actual_n_key_parts(keyinfo);
6987       key_flags= table->actual_key_flags(keyinfo);
6988 
6989       /* Calculate how many key segments of the current key we can use */
6990       start_key= keyuse;
6991 
6992       loose_scan_opt.next_ref_key();
6993       DBUG_PRINT("info", ("Considering ref access on key %s",
6994                           keyuse->table->key_info[keyuse->key].name.str));
6995 
6996       do /* For each keypart */
6997       {
6998         uint keypart= keyuse->keypart;
6999         table_map best_part_found_ref= 0;
7000         double best_prev_record_reads= DBL_MAX;
7001 
7002         do /* For each way to access the keypart */
7003         {
7004           /*
7005             if 1. expression doesn't refer to forward tables
7006                2. we won't get two ref-or-null's
7007           */
7008           if (!(remaining_tables & keyuse->used_tables) &&
7009               (!keyuse->validity_ref || *keyuse->validity_ref) &&
7010               s->access_from_tables_is_allowed(keyuse->used_tables,
7011                                                join->sjm_lookup_tables) &&
7012               !(ref_or_null_part && (keyuse->optimize &
7013                                      KEY_OPTIMIZE_REF_OR_NULL)))
7014           {
7015             found_part|= keyuse->keypart_map;
7016             if (!(keyuse->used_tables & ~join->const_table_map))
7017               const_part|= keyuse->keypart_map;
7018 
7019             if (!keyuse->val->maybe_null || keyuse->null_rejecting)
7020               notnull_part|=keyuse->keypart_map;
7021 
7022             double tmp2= prev_record_reads(join_positions, idx,
7023                                            (found_ref | keyuse->used_tables));
7024             if (tmp2 < best_prev_record_reads)
7025             {
7026               best_part_found_ref= keyuse->used_tables & ~join->const_table_map;
7027               best_prev_record_reads= tmp2;
7028             }
7029             if (rec > keyuse->ref_table_rows)
7030               rec= keyuse->ref_table_rows;
7031 	    /*
7032 	      If there is one 'key_column IS NULL' expression, we can
7033 	      use this ref_or_null optimisation of this field
7034 	    */
7035             if (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)
7036               ref_or_null_part |= keyuse->keypart_map;
7037           }
7038           loose_scan_opt.add_keyuse(remaining_tables, keyuse);
7039           keyuse++;
7040         } while (keyuse->table == table && keyuse->key == key &&
7041                  keyuse->keypart == keypart);
7042 	found_ref|= best_part_found_ref;
7043       } while (keyuse->table == table && keyuse->key == key);
7044 
7045       /*
7046         Assume that that each key matches a proportional part of table.
7047       */
7048       if (!found_part && !ft_key && !loose_scan_opt.have_a_case())
7049         continue;                               // Nothing usable found
7050 
7051       if (rec < MATCHING_ROWS_IN_OTHER_TABLE)
7052         rec= MATCHING_ROWS_IN_OTHER_TABLE;      // Fix for small tables
7053 
7054       /*
7055         ft-keys require special treatment
7056       */
7057       if (ft_key)
7058       {
7059         /*
7060           Really, there should be records=0.0 (yes!)
7061           but 1.0 would be probably safer
7062         */
7063         tmp= prev_record_reads(join_positions, idx, found_ref);
7064         records= 1.0;
7065       }
7066       else
7067       {
7068         found_constraint= MY_TEST(found_part);
7069         loose_scan_opt.check_ref_access_part1(s, key, start_key, found_part);
7070 
7071         /* Check if we found full key */
7072         const key_part_map all_key_parts= PREV_BITS(uint, key_parts);
7073         if (found_part == all_key_parts && !ref_or_null_part)
7074         {                                         /* use eq key */
7075           max_key_part= (uint) ~0;
7076           /*
7077             If the index is a unique index (1), and
7078             - all its columns are not null (2), or
7079             - equalities we are using reject NULLs (3)
7080             then the estimate is rows=1.
7081           */
7082           if ((key_flags & (HA_NOSAME | HA_EXT_NOSAME)) &&   // (1)
7083               (!(key_flags & HA_NULL_PART_KEY) ||            //  (2)
7084                all_key_parts == notnull_part))               //  (3)
7085           {
7086             tmp = prev_record_reads(join_positions, idx, found_ref);
7087             records=1.0;
7088           }
7089           else
7090           {
7091             if (!found_ref)
7092             {                                     /* We found a const key */
7093               /*
7094                 ReuseRangeEstimateForRef-1:
7095                 We get here if we've found a ref(const) (c_i are constants):
7096                   "(keypart1=c1) AND ... AND (keypartN=cN)"   [ref_const_cond]
7097 
7098                 If range optimizer was able to construct a "range"
7099                 access on this index, then its condition "quick_cond" was
7100                 eqivalent to ref_const_cond (*), and we can re-use E(#rows)
7101                 from the range optimizer.
7102 
7103                 Proof of (*): By properties of range and ref optimizers
7104                 quick_cond will be equal or tighther than ref_const_cond.
7105                 ref_const_cond already covers "smallest" possible interval -
7106                 a singlepoint interval over all keyparts. Therefore,
7107                 quick_cond is equivalent to ref_const_cond (if it was an
7108                 empty interval we wouldn't have got here).
7109               */
7110               if (table->quick_keys.is_set(key))
7111                 records= (double) table->quick_rows[key];
7112               else
7113               {
7114                 /* quick_range couldn't use key! */
7115                 records= (double) s->records/rec;
7116               }
7117             }
7118             else
7119             {
7120               if (!(records= keyinfo->actual_rec_per_key(key_parts-1)))
7121               {                                   /* Prefer longer keys */
7122                 records=
7123                   ((double) s->records / (double) rec *
7124                    (1.0 +
7125                     ((double) (table->s->max_key_length-keyinfo->key_length) /
7126                      (double) table->s->max_key_length)));
7127                 if (records < 2.0)
7128                   records=2.0;               /* Can't be as good as a unique */
7129               }
7130               /*
7131                 ReuseRangeEstimateForRef-2:  We get here if we could not reuse
7132                 E(#rows) from range optimizer. Make another try:
7133 
7134                 If range optimizer produced E(#rows) for a prefix of the ref
7135                 access we're considering, and that E(#rows) is lower then our
7136                 current estimate, make an adjustment. The criteria of when we
7137                 can make an adjustment is a special case of the criteria used
7138                 in ReuseRangeEstimateForRef-3.
7139               */
7140               if (table->quick_keys.is_set(key) &&
7141                   (const_part &
7142                     (((key_part_map)1 << table->quick_key_parts[key])-1)) ==
7143                   (((key_part_map)1 << table->quick_key_parts[key])-1) &&
7144                   table->quick_n_ranges[key] == 1 &&
7145                   records > (double) table->quick_rows[key])
7146               {
7147                 records= (double) table->quick_rows[key];
7148               }
7149             }
7150             /* Limit the number of matched rows */
7151             tmp= records;
7152             set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
7153             if (table->covering_keys.is_set(key))
7154               tmp= table->file->keyread_time(key, 1, (ha_rows) tmp);
7155             else
7156               tmp= table->file->read_time(key, 1,
7157                                           (ha_rows) MY_MIN(tmp,s->worst_seeks));
7158             tmp= COST_MULT(tmp, record_count);
7159           }
7160         }
7161         else
7162         {
7163           /*
7164             Use as much key-parts as possible and a uniq key is better
7165             than a not unique key
7166             Set tmp to (previous record count) * (records / combination)
7167           */
7168           if ((found_part & 1) &&
7169               (!(table->file->index_flags(key, 0, 0) & HA_ONLY_WHOLE_INDEX) ||
7170                found_part == PREV_BITS(uint,keyinfo->user_defined_key_parts)))
7171           {
7172             max_key_part= max_part_bit(found_part);
7173             /*
7174               ReuseRangeEstimateForRef-3:
7175               We're now considering a ref[or_null] access via
7176               (t.keypart1=e1 AND ... AND t.keypartK=eK) [ OR
7177               (same-as-above but with one cond replaced
7178                with "t.keypart_i IS NULL")]  (**)
7179 
7180               Try re-using E(#rows) from "range" optimizer:
7181               We can do so if "range" optimizer used the same intervals as
7182               in (**). The intervals used by range optimizer may be not
7183               available at this point (as "range" access might have choosen to
7184               create quick select over another index), so we can't compare
7185               them to (**). We'll make indirect judgements instead.
7186               The sufficient conditions for re-use are:
7187               (C1) All e_i in (**) are constants, i.e. found_ref==FALSE. (if
7188                    this is not satisfied we have no way to know which ranges
7189                    will be actually scanned by 'ref' until we execute the
7190                    join)
7191               (C2) max #key parts in 'range' access == K == max_key_part (this
7192                    is apparently a necessary requirement)
7193 
7194               We also have a property that "range optimizer produces equal or
7195               tighter set of scan intervals than ref(const) optimizer". Each
7196               of the intervals in (**) are "tightest possible" intervals when
7197               one limits itself to using keyparts 1..K (which we do in #2).
7198               From here it follows that range access used either one, or
7199               both of the (I1) and (I2) intervals:
7200 
7201                (t.keypart1=c1 AND ... AND t.keypartK=eK)  (I1)
7202                (same-as-above but with one cond replaced
7203                 with "t.keypart_i IS NULL")               (I2)
7204 
7205               The remaining part is to exclude the situation where range
7206               optimizer used one interval while we're considering
7207               ref-or-null and looking for estimate for two intervals. This
7208               is done by last limitation:
7209 
7210               (C3) "range optimizer used (have ref_or_null?2:1) intervals"
7211             */
7212             if (table->quick_keys.is_set(key) && !found_ref &&          //(C1)
7213                 table->quick_key_parts[key] == max_key_part &&          //(C2)
7214                 table->quick_n_ranges[key] == 1 + MY_TEST(ref_or_null_part)) //(C3)
7215             {
7216               tmp= records= (double) table->quick_rows[key];
7217             }
7218             else
7219             {
7220               /* Check if we have statistic about the distribution */
7221               if ((records= keyinfo->actual_rec_per_key(max_key_part-1)))
7222               {
7223                 /*
7224                   Fix for the case where the index statistics is too
7225                   optimistic: If
7226                   (1) We're considering ref(const) and there is quick select
7227                       on the same index,
7228                   (2) and that quick select uses more keyparts (i.e. it will
7229                       scan equal/smaller interval then this ref(const))
7230                   (3) and E(#rows) for quick select is higher then our
7231                       estimate,
7232                   Then
7233                     We'll use E(#rows) from quick select.
7234 
7235                   Q: Why do we choose to use 'ref'? Won't quick select be
7236                   cheaper in some cases ?
7237                   TODO: figure this out and adjust the plan choice if needed.
7238                 */
7239                 if (!found_ref && table->quick_keys.is_set(key) &&    // (1)
7240                     table->quick_key_parts[key] > max_key_part &&     // (2)
7241                     records < (double)table->quick_rows[key])         // (3)
7242                   records= (double)table->quick_rows[key];
7243 
7244                 tmp= records;
7245               }
7246               else
7247               {
7248                 /*
7249                   Assume that the first key part matches 1% of the file
7250                   and that the whole key matches 10 (duplicates) or 1
7251                   (unique) records.
7252                   Assume also that more key matches proportionally more
7253                   records
7254                   This gives the formula:
7255                   records = (x * (b-a) + a*c-b)/(c-1)
7256 
7257                   b = records matched by whole key
7258                   a = records matched by first key part (1% of all records?)
7259                   c = number of key parts in key
7260                   x = used key parts (1 <= x <= c)
7261                 */
7262                 double rec_per_key;
7263                 if (!(rec_per_key=(double)
7264                       keyinfo->rec_per_key[keyinfo->user_defined_key_parts-1]))
7265                   rec_per_key=(double) s->records/rec+1;
7266 
7267                 if (!s->records)
7268                   tmp = 0;
7269                 else if (rec_per_key/(double) s->records >= 0.01)
7270                   tmp = rec_per_key;
7271                 else
7272                 {
7273                   double a=s->records*0.01;
7274                   if (keyinfo->user_defined_key_parts > 1)
7275                     tmp= (max_key_part * (rec_per_key - a) +
7276                           a*keyinfo->user_defined_key_parts - rec_per_key)/
7277                          (keyinfo->user_defined_key_parts-1);
7278                   else
7279                     tmp= a;
7280                   set_if_bigger(tmp,1.0);
7281                 }
7282                 records = (ulong) tmp;
7283               }
7284 
7285               if (ref_or_null_part)
7286               {
7287                 /* We need to do two key searches to find key */
7288                 tmp *= 2.0;
7289                 records *= 2.0;
7290               }
7291 
7292               /*
7293                 ReuseRangeEstimateForRef-4:  We get here if we could not reuse
7294                 E(#rows) from range optimizer. Make another try:
7295 
7296                 If range optimizer produced E(#rows) for a prefix of the ref
7297                 access we're considering, and that E(#rows) is lower then our
7298                 current estimate, make the adjustment.
7299 
7300                 The decision whether we can re-use the estimate from the range
7301                 optimizer is the same as in ReuseRangeEstimateForRef-3,
7302                 applied to first table->quick_key_parts[key] key parts.
7303               */
7304               if (table->quick_keys.is_set(key) &&
7305                   table->quick_key_parts[key] <= max_key_part &&
7306                   const_part &
7307                     ((key_part_map)1 << table->quick_key_parts[key]) &&
7308                   table->quick_n_ranges[key] == 1 + MY_TEST(ref_or_null_part &
7309                                                             const_part) &&
7310                   records > (double) table->quick_rows[key])
7311               {
7312                 tmp= records= (double) table->quick_rows[key];
7313               }
7314             }
7315 
7316             /* Limit the number of matched rows */
7317             set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
7318             if (table->covering_keys.is_set(key))
7319               tmp= table->file->keyread_time(key, 1, (ha_rows) tmp);
7320             else
7321               tmp= table->file->read_time(key, 1,
7322                                           (ha_rows) MY_MIN(tmp,s->worst_seeks));
7323             tmp= COST_MULT(tmp, record_count);
7324           }
7325           else
7326             tmp= best_time;                     // Do nothing
7327         }
7328 
7329         tmp= COST_ADD(tmp, s->startup_cost);
7330         loose_scan_opt.check_ref_access_part2(key, start_key, records, tmp,
7331                                               found_ref);
7332       } /* not ft_key */
7333 
7334       if (tmp + 0.0001 < best_time - records/(double) TIME_FOR_COMPARE)
7335       {
7336         best_time= COST_ADD(tmp, records/(double) TIME_FOR_COMPARE);
7337         best= tmp;
7338         best_records= records;
7339         best_key= start_key;
7340         best_max_key_part= max_key_part;
7341         best_ref_depends_map= found_ref;
7342       }
7343     } /* for each key */
7344     records= best_records;
7345   }
7346 
7347   /*
7348     If there is no key to access the table, but there is an equi-join
7349     predicate connecting the table with the privious tables then we
7350     consider the possibility of using hash join.
7351     We need also to check that:
7352     (1) s is inner table of semi-join -> join cache is allowed for semijoins
7353     (2) s is inner table of outer join -> join cache is allowed for outer joins
7354   */
7355   if (idx > join->const_tables && best_key == 0 &&
7356       (join->allowed_join_cache_types & JOIN_CACHE_HASHED_BIT) &&
7357       join->max_allowed_join_cache_level > 2 &&
7358      !bitmap_is_clear_all(eq_join_set) &&  !disable_jbuf &&
7359       (!s->emb_sj_nest ||
7360        join->allowed_semijoin_with_cache) &&    // (1)
7361       (!(s->table->map & join->outer_join) ||
7362        join->allowed_outer_join_with_cache))    // (2)
7363   {
7364     double join_sel= 0.1;
7365     /* Estimate the cost of  the hash join access to the table */
7366     double rnd_records= matching_candidates_in_table(s, found_constraint,
7367                                                      use_cond_selectivity);
7368 
7369     tmp= s->quick ? s->quick->read_time : s->scan_time();
7370     double cmp_time= (s->records - rnd_records)/(double) TIME_FOR_COMPARE;
7371     tmp= COST_ADD(tmp, cmp_time);
7372 
7373     /* We read the table as many times as join buffer becomes full. */
7374 
7375     double refills= (1.0 + floor((double) cache_record_length(join,idx) *
7376                            record_count /
7377 			   (double) thd->variables.join_buff_size));
7378     tmp= COST_MULT(tmp, refills);
7379     best_time= COST_ADD(tmp,
7380                         COST_MULT((record_count*join_sel) / TIME_FOR_COMPARE,
7381                                   rnd_records));
7382     best= tmp;
7383     records= rnd_records;
7384     best_key= hj_start_key;
7385     best_ref_depends_map= 0;
7386     best_uses_jbuf= TRUE;
7387    }
7388 
7389   /*
7390     Don't test table scan if it can't be better.
7391     Prefer key lookup if we would use the same key for scanning.
7392 
7393     Don't do a table scan on InnoDB tables, if we can read the used
7394     parts of the row from any of the used index.
7395     This is because table scans uses index and we would not win
7396     anything by using a table scan.
7397 
7398     A word for word translation of the below if-statement in sergefp's
7399     understanding: we check if we should use table scan if:
7400     (1) The found 'ref' access produces more records than a table scan
7401         (or index scan, or quick select), or 'ref' is more expensive than
7402         any of them.
7403     (2) This doesn't hold: the best way to perform table scan is to to perform
7404         'range' access using index IDX, and the best way to perform 'ref'
7405         access is to use the same index IDX, with the same or more key parts.
7406         (note: it is not clear how this rule is/should be extended to
7407         index_merge quick selects). Also if we have a hash join we prefer that
7408         over a table scan
7409     (3) See above note about InnoDB.
7410     (4) NOT ("FORCE INDEX(...)" is used for table and there is 'ref' access
7411              path, but there is no quick select)
7412         If the condition in the above brackets holds, then the only possible
7413         "table scan" access method is ALL/index (there is no quick select).
7414         Since we have a 'ref' access path, and FORCE INDEX instructs us to
7415         choose it over ALL/index, there is no need to consider a full table
7416         scan.
7417     (5) Non-flattenable semi-joins: don't consider doing a scan of temporary
7418         table if we had an option to make lookups into it. In real-world cases,
7419         lookups are cheaper than full scans, but when the table is small, they
7420         can be [considered to be] more expensive, which causes lookups not to
7421         be used for cases with small datasets, which is annoying.
7422   */
7423   if ((records >= s->found_records || best > s->read_time) &&            // (1)
7424       !(best_key && best_key->key == MAX_KEY) &&                         // (2)
7425       !(s->quick && best_key && s->quick->index == best_key->key &&      // (2)
7426         best_max_key_part >= s->table->quick_key_parts[best_key->key]) &&// (2)
7427       !((s->table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) &&   // (3)
7428         ! s->table->covering_keys.is_clear_all() && best_key && !s->quick) &&// (3)
7429       !(s->table->force_index && best_key && !s->quick) &&               // (4)
7430       !(best_key && s->table->pos_in_table_list->jtbm_subselect))        // (5)
7431   {                                             // Check full join
7432     double rnd_records= matching_candidates_in_table(s, found_constraint,
7433                                                       use_cond_selectivity);
7434 
7435     /*
7436       Range optimizer never proposes a RANGE if it isn't better
7437       than FULL: so if RANGE is present, it's always preferred to FULL.
7438       Here we estimate its cost.
7439     */
7440 
7441     if (s->quick)
7442     {
7443       /*
7444         For each record we:
7445         - read record range through 'quick'
7446         - skip rows which does not satisfy WHERE constraints
7447         TODO:
7448         We take into account possible use of join cache for ALL/index
7449         access (see first else-branch below), but we don't take it into
7450         account here for range/index_merge access. Find out why this is so.
7451       */
7452       double cmp_time= (s->found_records - rnd_records)/(double) TIME_FOR_COMPARE;
7453       tmp= COST_MULT(record_count,
7454                      COST_ADD(s->quick->read_time, cmp_time));
7455 
7456       loose_scan_opt.check_range_access(join, idx, s->quick);
7457     }
7458     else
7459     {
7460       /* Estimate cost of reading table. */
7461       if (s->table->force_index && !best_key) // index scan
7462         tmp= s->table->file->read_time(s->ref.key, 1, s->records);
7463       else // table scan
7464         tmp= s->scan_time();
7465 
7466       if ((s->table->map & join->outer_join) || disable_jbuf)     // Can't use join cache
7467       {
7468         /*
7469           For each record we have to:
7470           - read the whole table record
7471           - skip rows which does not satisfy join condition
7472         */
7473         double cmp_time= (s->records - rnd_records)/(double) TIME_FOR_COMPARE;
7474         tmp= COST_MULT(record_count, COST_ADD(tmp,cmp_time));
7475       }
7476       else
7477       {
7478         double refills= (1.0 + floor((double) cache_record_length(join,idx) *
7479                         (record_count /
7480                          (double) thd->variables.join_buff_size)));
7481         tmp= COST_MULT(tmp, refills);
7482         /*
7483             We don't make full cartesian product between rows in the scanned
7484            table and existing records because we skip all rows from the
7485            scanned table, which does not satisfy join condition when
7486            we read the table (see flush_cached_records for details). Here we
7487            take into account cost to read and skip these records.
7488         */
7489         double cmp_time= (s->records - rnd_records)/(double) TIME_FOR_COMPARE;
7490         tmp= COST_ADD(tmp, cmp_time);
7491       }
7492     }
7493 
7494     /* Splitting technique cannot be used with join cache */
7495     if (s->table->is_splittable())
7496       tmp+= s->table->get_materialization_cost();
7497     else
7498       tmp+= s->startup_cost;
7499     /*
7500       We estimate the cost of evaluating WHERE clause for found records
7501       as record_count * rnd_records / TIME_FOR_COMPARE. This cost plus
7502       tmp give us total cost of using TABLE SCAN
7503     */
7504     if (best == DBL_MAX ||
7505         COST_ADD(tmp, record_count/(double) TIME_FOR_COMPARE*rnd_records) <
7506          (best_key->is_for_hash_join() ? best_time :
7507           COST_ADD(best, record_count/(double) TIME_FOR_COMPARE*records)))
7508     {
7509       /*
7510         If the table has a range (s->quick is set) make_join_select()
7511         will ensure that this will be used
7512       */
7513       best= tmp;
7514       records= rnd_records;
7515       best_key= 0;
7516       /* range/index_merge/ALL/index access method are "independent", so: */
7517       best_ref_depends_map= 0;
7518       best_uses_jbuf= MY_TEST(!disable_jbuf && !((s->table->map &
7519                                                   join->outer_join)));
7520       spl_plan= 0;
7521     }
7522   }
7523 
7524   /* Update the cost information for the current partial plan */
7525   pos->records_read= records;
7526   pos->read_time=    best;
7527   pos->key=          best_key;
7528   pos->table=        s;
7529   pos->ref_depend_map= best_ref_depends_map;
7530   pos->loosescan_picker.loosescan_key= MAX_KEY;
7531   pos->use_join_buffer= best_uses_jbuf;
7532   pos->spl_plan= spl_plan;
7533 
7534   loose_scan_opt.save_to_position(s, loose_scan_pos);
7535 
7536   if (!best_key &&
7537       idx == join->const_tables &&
7538       s->table == join->sort_by_table &&
7539       join->unit->select_limit_cnt >= records)
7540     join->sort_by_table= (TABLE*) 1;  // Must use temporary table
7541 
7542   DBUG_VOID_RETURN;
7543 }
7544 
7545 
7546 /*
7547   Find JOIN_TAB's embedding (i.e, parent) subquery.
7548   - For merged semi-joins, tables inside the semi-join nest have their
7549     semi-join nest as parent.  We intentionally ignore results of table
7550     pullout action here.
7551   - For non-merged semi-joins (JTBM tabs), the embedding subquery is the
7552     JTBM join tab itself.
7553 */
7554 
get_emb_subq(JOIN_TAB * tab)7555 static TABLE_LIST* get_emb_subq(JOIN_TAB *tab)
7556 {
7557   TABLE_LIST *tlist= tab->table->pos_in_table_list;
7558   if (tlist->jtbm_subselect)
7559     return tlist;
7560   TABLE_LIST *embedding= tlist->embedding;
7561   if (!embedding || !embedding->sj_subq_pred)
7562     return NULL;
7563   return embedding;
7564 }
7565 
7566 
7567 /*
7568   Choose initial table order that "helps" semi-join optimizations.
7569 
7570   The idea is that we should start with the order that is the same as the one
7571   we would have had if we had semijoin=off:
7572   - Top-level tables go first
7573   - subquery tables are grouped together by the subquery they are in,
7574   - subquery tables are attached where the subquery predicate would have been
7575     attached if we had semi-join off.
7576 
7577   This function relies on join_tab_cmp()/join_tab_cmp_straight() to produce
7578   certain pre-liminary ordering, see compare_embedding_subqueries() for its
7579   description.
7580 */
7581 
choose_initial_table_order(JOIN * join)7582 static void choose_initial_table_order(JOIN *join)
7583 {
7584   TABLE_LIST *emb_subq;
7585   JOIN_TAB **tab= join->best_ref + join->const_tables;
7586   JOIN_TAB **tabs_end= tab + join->table_count - join->const_tables;
7587   DBUG_ENTER("choose_initial_table_order");
7588   /* Find where the top-level JOIN_TABs end and subquery JOIN_TABs start */
7589   for (; tab != tabs_end; tab++)
7590   {
7591     if ((emb_subq= get_emb_subq(*tab)))
7592       break;
7593   }
7594   uint n_subquery_tabs= (uint)(tabs_end - tab);
7595 
7596   if (!n_subquery_tabs)
7597     DBUG_VOID_RETURN;
7598 
7599   /* Copy the subquery JOIN_TABs to a separate array */
7600   JOIN_TAB *subquery_tabs[MAX_TABLES];
7601   memcpy(subquery_tabs, tab, sizeof(JOIN_TAB*) * n_subquery_tabs);
7602 
7603   JOIN_TAB **last_top_level_tab= tab;
7604   JOIN_TAB **subq_tab= subquery_tabs;
7605   JOIN_TAB **subq_tabs_end= subquery_tabs + n_subquery_tabs;
7606   TABLE_LIST *cur_subq_nest= NULL;
7607   for (; subq_tab < subq_tabs_end; subq_tab++)
7608   {
7609     if (get_emb_subq(*subq_tab)!= cur_subq_nest)
7610     {
7611       /*
7612         Reached the part of subquery_tabs that covers tables in some subquery.
7613       */
7614       cur_subq_nest= get_emb_subq(*subq_tab);
7615 
7616       /* Determine how many tables the subquery has */
7617       JOIN_TAB **last_tab_for_subq;
7618       for (last_tab_for_subq= subq_tab;
7619            last_tab_for_subq < subq_tabs_end &&
7620            get_emb_subq(*last_tab_for_subq) == cur_subq_nest;
7621            last_tab_for_subq++) {}
7622       uint n_subquery_tables= (uint)(last_tab_for_subq - subq_tab);
7623 
7624       /*
7625         Walk the original array and find where this subquery would have been
7626         attached to
7627       */
7628       table_map need_tables= cur_subq_nest->original_subq_pred_used_tables;
7629       need_tables &= ~(join->const_table_map | PSEUDO_TABLE_BITS);
7630       for (JOIN_TAB **top_level_tab= join->best_ref + join->const_tables;
7631            top_level_tab < last_top_level_tab;
7632            //top_level_tab < join->best_ref + join->table_count;
7633            top_level_tab++)
7634       {
7635         need_tables &= ~(*top_level_tab)->table->map;
7636         /* Check if this is the place where subquery should be attached */
7637         if (!need_tables)
7638         {
7639           /* Move away the top-level tables that are after top_level_tab */
7640           size_t top_tail_len= last_top_level_tab - top_level_tab - 1;
7641           memmove(top_level_tab + 1 + n_subquery_tables, top_level_tab + 1,
7642                   sizeof(JOIN_TAB*)*top_tail_len);
7643           last_top_level_tab += n_subquery_tables;
7644           memcpy(top_level_tab + 1, subq_tab, sizeof(JOIN_TAB*)*n_subquery_tables);
7645           break;
7646         }
7647       }
7648       DBUG_ASSERT(!need_tables);
7649       subq_tab += n_subquery_tables - 1;
7650     }
7651   }
7652   DBUG_VOID_RETURN;
7653 }
7654 
7655 
7656 /**
7657   Selects and invokes a search strategy for an optimal query plan.
7658 
7659   The function checks user-configurable parameters that control the search
7660   strategy for an optimal plan, selects the search method and then invokes
7661   it. Each specific optimization procedure stores the final optimal plan in
7662   the array 'join->best_positions', and the cost of the plan in
7663   'join->best_read'.
7664 
7665   @param join         pointer to the structure providing all context info for
7666                       the query
7667   @param join_tables  set of the tables in the query
7668 
7669   @retval
7670     FALSE       ok
7671   @retval
7672     TRUE        Fatal error
7673 */
7674 
7675 bool
choose_plan(JOIN * join,table_map join_tables)7676 choose_plan(JOIN *join, table_map join_tables)
7677 {
7678   uint search_depth= join->thd->variables.optimizer_search_depth;
7679   uint prune_level=  join->thd->variables.optimizer_prune_level;
7680   uint use_cond_selectivity=
7681          join->thd->variables.optimizer_use_condition_selectivity;
7682   bool straight_join= MY_TEST(join->select_options & SELECT_STRAIGHT_JOIN);
7683   DBUG_ENTER("choose_plan");
7684 
7685   join->cur_embedding_map= 0;
7686   reset_nj_counters(join, join->join_list);
7687   qsort2_cmp jtab_sort_func;
7688 
7689   if (join->emb_sjm_nest)
7690   {
7691     /* We're optimizing semi-join materialization nest, so put the
7692        tables from this semi-join as first
7693     */
7694     jtab_sort_func= join_tab_cmp_embedded_first;
7695   }
7696   else
7697   {
7698     /*
7699       if (SELECT_STRAIGHT_JOIN option is set)
7700         reorder tables so dependent tables come after tables they depend
7701         on, otherwise keep tables in the order they were specified in the query
7702       else
7703         Apply heuristic: pre-sort all access plans with respect to the number of
7704         records accessed.
7705     */
7706     jtab_sort_func= straight_join ? join_tab_cmp_straight : join_tab_cmp;
7707   }
7708 
7709   /*
7710     psergey-todo: if we're not optimizing an SJM nest,
7711      - sort that outer tables are first, and each sjm nest follows
7712      - then, put each [sjm_table1, ... sjm_tableN] sub-array right where
7713        WHERE clause pushdown would have put it.
7714   */
7715   my_qsort2(join->best_ref + join->const_tables,
7716             join->table_count - join->const_tables, sizeof(JOIN_TAB*),
7717             jtab_sort_func, (void*)join->emb_sjm_nest);
7718 
7719   if (!join->emb_sjm_nest)
7720   {
7721     choose_initial_table_order(join);
7722   }
7723   join->cur_sj_inner_tables= 0;
7724 
7725   if (straight_join)
7726   {
7727     optimize_straight_join(join, join_tables);
7728   }
7729   else
7730   {
7731     DBUG_ASSERT(search_depth <= MAX_TABLES + 1);
7732     if (search_depth == 0)
7733       /* Automatically determine a reasonable value for 'search_depth' */
7734       search_depth= determine_search_depth(join);
7735     if (greedy_search(join, join_tables, search_depth, prune_level,
7736                       use_cond_selectivity))
7737       DBUG_RETURN(TRUE);
7738   }
7739 
7740   /*
7741     Store the cost of this query into a user variable
7742     Don't update last_query_cost for statements that are not "flat joins" :
7743     i.e. they have subqueries, unions or call stored procedures.
7744     TODO: calculate a correct cost for a query with subqueries and UNIONs.
7745   */
7746   if (join->thd->lex->is_single_level_stmt())
7747     join->thd->status_var.last_query_cost= join->best_read;
7748   DBUG_RETURN(FALSE);
7749 }
7750 
7751 
7752 /*
7753   Compare two join tabs based on the subqueries they are from.
7754    - top-level join tabs go first
7755    - then subqueries are ordered by their select_id (we're using this
7756      criteria because we need a cross-platform, deterministic ordering)
7757 
7758   @return
7759      0   -  equal
7760      -1  -  jt1 < jt2
7761      1   -  jt1 > jt2
7762 */
7763 
compare_embedding_subqueries(JOIN_TAB * jt1,JOIN_TAB * jt2)7764 static int compare_embedding_subqueries(JOIN_TAB *jt1, JOIN_TAB *jt2)
7765 {
7766   /* Determine if the first table is originally from a subquery */
7767   TABLE_LIST *tbl1= jt1->table->pos_in_table_list;
7768   uint tbl1_select_no;
7769   if (tbl1->jtbm_subselect)
7770   {
7771     tbl1_select_no=
7772       tbl1->jtbm_subselect->unit->first_select()->select_number;
7773   }
7774   else if (tbl1->embedding && tbl1->embedding->sj_subq_pred)
7775   {
7776     tbl1_select_no=
7777       tbl1->embedding->sj_subq_pred->unit->first_select()->select_number;
7778   }
7779   else
7780     tbl1_select_no= 1; /* Top-level */
7781 
7782   /* Same for the second table */
7783   TABLE_LIST *tbl2= jt2->table->pos_in_table_list;
7784   uint tbl2_select_no;
7785   if (tbl2->jtbm_subselect)
7786   {
7787     tbl2_select_no=
7788       tbl2->jtbm_subselect->unit->first_select()->select_number;
7789   }
7790   else if (tbl2->embedding && tbl2->embedding->sj_subq_pred)
7791   {
7792     tbl2_select_no=
7793       tbl2->embedding->sj_subq_pred->unit->first_select()->select_number;
7794   }
7795   else
7796     tbl2_select_no= 1; /* Top-level */
7797 
7798   /*
7799     Put top-level tables in front. Tables from within subqueries must follow,
7800     grouped by their owner subquery. We don't care about the order that
7801     subquery groups are in, because choose_initial_table_order() will re-order
7802     the groups.
7803   */
7804   if (tbl1_select_no != tbl2_select_no)
7805     return tbl1_select_no > tbl2_select_no ? 1 : -1;
7806   return 0;
7807 }
7808 
7809 
7810 /**
7811   Compare two JOIN_TAB objects based on the number of accessed records.
7812 
7813   @param ptr1 pointer to first JOIN_TAB object
7814   @param ptr2 pointer to second JOIN_TAB object
7815 
7816   NOTES
7817     The order relation implemented by join_tab_cmp() is not transitive,
7818     i.e. it is possible to choose such a, b and c that (a < b) && (b < c)
7819     but (c < a). This implies that result of a sort using the relation
7820     implemented by join_tab_cmp() depends on the order in which
7821     elements are compared, i.e. the result is implementation-specific.
7822     Example:
7823       a: dependent = 0x0 table->map = 0x1 found_records = 3 ptr = 0x907e6b0
7824       b: dependent = 0x0 table->map = 0x2 found_records = 3 ptr = 0x907e838
7825       c: dependent = 0x6 table->map = 0x10 found_records = 2 ptr = 0x907ecd0
7826 
7827    As for subqueries, this function must produce order that can be fed to
7828    choose_initial_table_order().
7829 
7830   @retval
7831     1  if first is bigger
7832   @retval
7833     -1  if second is bigger
7834   @retval
7835     0  if equal
7836 */
7837 
7838 static int
join_tab_cmp(const void * dummy,const void * ptr1,const void * ptr2)7839 join_tab_cmp(const void *dummy, const void* ptr1, const void* ptr2)
7840 {
7841   JOIN_TAB *jt1= *(JOIN_TAB**) ptr1;
7842   JOIN_TAB *jt2= *(JOIN_TAB**) ptr2;
7843   int cmp;
7844 
7845   if ((cmp= compare_embedding_subqueries(jt1, jt2)) != 0)
7846     return cmp;
7847   /*
7848     After that,
7849     take care about ordering imposed by LEFT JOIN constraints,
7850     possible [eq]ref accesses, and numbers of matching records in the table.
7851   */
7852   if (jt1->dependent & jt2->table->map)
7853     return 1;
7854   if (jt2->dependent & jt1->table->map)
7855     return -1;
7856   if (jt1->found_records > jt2->found_records)
7857     return 1;
7858   if (jt1->found_records < jt2->found_records)
7859     return -1;
7860   return jt1 > jt2 ? 1 : (jt1 < jt2 ? -1 : 0);
7861 }
7862 
7863 
7864 /**
7865   Same as join_tab_cmp, but for use with SELECT_STRAIGHT_JOIN.
7866 */
7867 
7868 static int
join_tab_cmp_straight(const void * dummy,const void * ptr1,const void * ptr2)7869 join_tab_cmp_straight(const void *dummy, const void* ptr1, const void* ptr2)
7870 {
7871   JOIN_TAB *jt1= *(JOIN_TAB**) ptr1;
7872   JOIN_TAB *jt2= *(JOIN_TAB**) ptr2;
7873 
7874   /*
7875     We don't do subquery flattening if the parent or child select has
7876     STRAIGHT_JOIN modifier. It is complicated to implement and the semantics
7877     is hardly useful.
7878   */
7879   DBUG_ASSERT(!jt1->emb_sj_nest);
7880   DBUG_ASSERT(!jt2->emb_sj_nest);
7881 
7882   int cmp;
7883   if ((cmp= compare_embedding_subqueries(jt1, jt2)) != 0)
7884     return cmp;
7885 
7886   if (jt1->dependent & jt2->table->map)
7887     return 1;
7888   if (jt2->dependent & jt1->table->map)
7889     return -1;
7890   return jt1 > jt2 ? 1 : (jt1 < jt2 ? -1 : 0);
7891 }
7892 
7893 
7894 /*
7895   Same as join_tab_cmp but tables from within the given semi-join nest go
7896   first. Used when the optimizing semi-join materialization nests.
7897 */
7898 
7899 static int
join_tab_cmp_embedded_first(const void * emb,const void * ptr1,const void * ptr2)7900 join_tab_cmp_embedded_first(const void *emb,  const void* ptr1, const void* ptr2)
7901 {
7902   const TABLE_LIST *emb_nest= (TABLE_LIST*) emb;
7903   JOIN_TAB *jt1= *(JOIN_TAB**) ptr1;
7904   JOIN_TAB *jt2= *(JOIN_TAB**) ptr2;
7905 
7906   if (jt1->emb_sj_nest == emb_nest && jt2->emb_sj_nest != emb_nest)
7907     return -1;
7908   if (jt1->emb_sj_nest != emb_nest && jt2->emb_sj_nest == emb_nest)
7909     return 1;
7910 
7911   if (jt1->dependent & jt2->table->map)
7912     return 1;
7913   if (jt2->dependent & jt1->table->map)
7914     return -1;
7915 
7916   if (jt1->found_records > jt2->found_records)
7917     return 1;
7918   if (jt1->found_records < jt2->found_records)
7919     return -1;
7920 
7921   return jt1 > jt2 ? 1 : (jt1 < jt2 ? -1 : 0);
7922 }
7923 
7924 
7925 /**
7926   Heuristic procedure to automatically guess a reasonable degree of
7927   exhaustiveness for the greedy search procedure.
7928 
7929   The procedure estimates the optimization time and selects a search depth
7930   big enough to result in a near-optimal QEP, that doesn't take too long to
7931   find. If the number of tables in the query exceeds some constant, then
7932   search_depth is set to this constant.
7933 
7934   @param join   pointer to the structure providing all context info for
7935                 the query
7936 
7937   @note
7938     This is an extremely simplistic implementation that serves as a stub for a
7939     more advanced analysis of the join. Ideally the search depth should be
7940     determined by learning from previous query optimizations, because it will
7941     depend on the CPU power (and other factors).
7942 
7943   @todo
7944     this value should be determined dynamically, based on statistics:
7945     uint max_tables_for_exhaustive_opt= 7;
7946 
7947   @todo
7948     this value could be determined by some mapping of the form:
7949     depth : table_count -> [max_tables_for_exhaustive_opt..MAX_EXHAUSTIVE]
7950 
7951   @return
7952     A positive integer that specifies the search depth (and thus the
7953     exhaustiveness) of the depth-first search algorithm used by
7954     'greedy_search'.
7955 */
7956 
7957 static uint
determine_search_depth(JOIN * join)7958 determine_search_depth(JOIN *join)
7959 {
7960   uint table_count=  join->table_count - join->const_tables;
7961   uint search_depth;
7962   /* TODO: this value should be determined dynamically, based on statistics: */
7963   uint max_tables_for_exhaustive_opt= 7;
7964 
7965   if (table_count <= max_tables_for_exhaustive_opt)
7966     search_depth= table_count+1; // use exhaustive for small number of tables
7967   else
7968     /*
7969       TODO: this value could be determined by some mapping of the form:
7970       depth : table_count -> [max_tables_for_exhaustive_opt..MAX_EXHAUSTIVE]
7971     */
7972     search_depth= max_tables_for_exhaustive_opt; // use greedy search
7973 
7974   return search_depth;
7975 }
7976 
7977 
7978 /**
7979   Select the best ways to access the tables in a query without reordering them.
7980 
7981     Find the best access paths for each query table and compute their costs
7982     according to their order in the array 'join->best_ref' (thus without
7983     reordering the join tables). The function calls sequentially
7984     'best_access_path' for each table in the query to select the best table
7985     access method. The final optimal plan is stored in the array
7986     'join->best_positions', and the corresponding cost in 'join->best_read'.
7987 
7988   @param join          pointer to the structure providing all context info for
7989                        the query
7990   @param join_tables   set of the tables in the query
7991 
7992   @note
7993     This function can be applied to:
7994     - queries with STRAIGHT_JOIN
7995     - internally to compute the cost of an arbitrary QEP
7996   @par
7997     Thus 'optimize_straight_join' can be used at any stage of the query
7998     optimization process to finalize a QEP as it is.
7999 */
8000 
8001 static void
optimize_straight_join(JOIN * join,table_map join_tables)8002 optimize_straight_join(JOIN *join, table_map join_tables)
8003 {
8004   JOIN_TAB *s;
8005   uint idx= join->const_tables;
8006   bool disable_jbuf= join->thd->variables.join_cache_level == 0;
8007   double    record_count= 1.0;
8008   double    read_time=    0.0;
8009   uint use_cond_selectivity=
8010          join->thd->variables.optimizer_use_condition_selectivity;
8011   POSITION  loose_scan_pos;
8012 
8013   for (JOIN_TAB **pos= join->best_ref + idx ; (s= *pos) ; pos++)
8014   {
8015     /* Find the best access method from 's' to the current partial plan */
8016     best_access_path(join, s, join_tables, join->positions, idx,
8017                      disable_jbuf, record_count,
8018                      join->positions + idx, &loose_scan_pos);
8019 
8020     /* compute the cost of the new plan extended with 's' */
8021     record_count= COST_MULT(record_count, join->positions[idx].records_read);
8022     read_time= COST_ADD(read_time,
8023                         COST_ADD(join->positions[idx].read_time,
8024                                  record_count / (double) TIME_FOR_COMPARE));
8025     advance_sj_state(join, join_tables, idx, &record_count, &read_time,
8026                      &loose_scan_pos);
8027 
8028     join_tables&= ~(s->table->map);
8029     double pushdown_cond_selectivity= 1.0;
8030     if (use_cond_selectivity > 1)
8031       pushdown_cond_selectivity= table_cond_selectivity(join, idx, s,
8032                                                         join_tables);
8033     join->positions[idx].cond_selectivity= pushdown_cond_selectivity;
8034     ++idx;
8035   }
8036 
8037   if (join->sort_by_table &&
8038       join->sort_by_table != join->positions[join->const_tables].table->table)
8039     read_time+= record_count;  // We have to make a temp table
8040   memcpy((uchar*) join->best_positions, (uchar*) join->positions,
8041          sizeof(POSITION)*idx);
8042   join->join_record_count= record_count;
8043   join->best_read= read_time - 0.001;
8044 }
8045 
8046 
8047 /**
8048   Find a good, possibly optimal, query execution plan (QEP) by a greedy search.
8049 
8050     The search procedure uses a hybrid greedy/exhaustive search with controlled
8051     exhaustiveness. The search is performed in N = card(remaining_tables)
8052     steps. Each step evaluates how promising is each of the unoptimized tables,
8053     selects the most promising table, and extends the current partial QEP with
8054     that table.  Currenly the most 'promising' table is the one with least
8055     expensive extension.\
8056 
8057     There are two extreme cases:
8058     -# When (card(remaining_tables) < search_depth), the estimate finds the
8059     best complete continuation of the partial QEP. This continuation can be
8060     used directly as a result of the search.
8061     -# When (search_depth == 1) the 'best_extension_by_limited_search'
8062     consideres the extension of the current QEP with each of the remaining
8063     unoptimized tables.
8064 
8065     All other cases are in-between these two extremes. Thus the parameter
8066     'search_depth' controlls the exhaustiveness of the search. The higher the
8067     value, the longer the optimization time and possibly the better the
8068     resulting plan. The lower the value, the fewer alternative plans are
8069     estimated, but the more likely to get a bad QEP.
8070 
8071     All intermediate and final results of the procedure are stored in 'join':
8072     - join->positions     : modified for every partial QEP that is explored
8073     - join->best_positions: modified for the current best complete QEP
8074     - join->best_read     : modified for the current best complete QEP
8075     - join->best_ref      : might be partially reordered
8076 
8077     The final optimal plan is stored in 'join->best_positions', and its
8078     corresponding cost in 'join->best_read'.
8079 
8080   @note
8081     The following pseudocode describes the algorithm of 'greedy_search':
8082 
8083     @code
8084     procedure greedy_search
8085     input: remaining_tables
8086     output: pplan;
8087     {
8088       pplan = <>;
8089       do {
8090         (t, a) = best_extension(pplan, remaining_tables);
8091         pplan = concat(pplan, (t, a));
8092         remaining_tables = remaining_tables - t;
8093       } while (remaining_tables != {})
8094       return pplan;
8095     }
8096 
8097   @endcode
8098     where 'best_extension' is a placeholder for a procedure that selects the
8099     most "promising" of all tables in 'remaining_tables'.
8100     Currently this estimate is performed by calling
8101     'best_extension_by_limited_search' to evaluate all extensions of the
8102     current QEP of size 'search_depth', thus the complexity of 'greedy_search'
8103     mainly depends on that of 'best_extension_by_limited_search'.
8104 
8105   @par
8106     If 'best_extension()' == 'best_extension_by_limited_search()', then the
8107     worst-case complexity of this algorithm is <=
8108     O(N*N^search_depth/search_depth). When serch_depth >= N, then the
8109     complexity of greedy_search is O(N!).
8110 
8111   @par
8112     In the future, 'greedy_search' might be extended to support other
8113     implementations of 'best_extension', e.g. some simpler quadratic procedure.
8114 
8115   @param join             pointer to the structure providing all context info
8116                           for the query
8117   @param remaining_tables set of tables not included into the partial plan yet
8118   @param search_depth     controlls the exhaustiveness of the search
8119   @param prune_level      the pruning heuristics that should be applied during
8120                           search
8121   @param use_cond_selectivity  specifies how the selectivity of the conditions
8122                           pushed to a table should be taken into account
8123 
8124   @retval
8125     FALSE       ok
8126   @retval
8127     TRUE        Fatal error
8128 */
8129 
8130 static bool
greedy_search(JOIN * join,table_map remaining_tables,uint search_depth,uint prune_level,uint use_cond_selectivity)8131 greedy_search(JOIN      *join,
8132               table_map remaining_tables,
8133               uint      search_depth,
8134               uint      prune_level,
8135               uint      use_cond_selectivity)
8136 {
8137   double    record_count= 1.0;
8138   double    read_time=    0.0;
8139   uint      idx= join->const_tables; // index into 'join->best_ref'
8140   uint      best_idx;
8141   uint      size_remain;    // cardinality of remaining_tables
8142   POSITION  best_pos;
8143   JOIN_TAB  *best_table; // the next plan node to be added to the curr QEP
8144   // ==join->tables or # tables in the sj-mat nest we're optimizing
8145   uint      n_tables __attribute__((unused));
8146   DBUG_ENTER("greedy_search");
8147 
8148   /* number of tables that remain to be optimized */
8149   n_tables= size_remain= my_count_bits(remaining_tables &
8150                                        (join->emb_sjm_nest?
8151                                          (join->emb_sjm_nest->sj_inner_tables &
8152                                           ~join->const_table_map)
8153                                          :
8154                                          ~(table_map)0));
8155 
8156   do {
8157     /* Find the extension of the current QEP with the lowest cost */
8158     join->best_read= DBL_MAX;
8159     if (best_extension_by_limited_search(join, remaining_tables, idx, record_count,
8160                                          read_time, search_depth, prune_level,
8161                                          use_cond_selectivity))
8162       DBUG_RETURN(TRUE);
8163     /*
8164       'best_read < DBL_MAX' means that optimizer managed to find
8165       some plan and updated 'best_positions' array accordingly.
8166     */
8167     DBUG_ASSERT(join->best_read < DBL_MAX);
8168 
8169     if (size_remain <= search_depth)
8170     {
8171       /*
8172         'join->best_positions' contains a complete optimal extension of the
8173         current partial QEP.
8174       */
8175       DBUG_EXECUTE("opt", print_plan(join, n_tables,
8176                                      record_count, read_time, read_time,
8177                                      "optimal"););
8178       DBUG_RETURN(FALSE);
8179     }
8180 
8181     /* select the first table in the optimal extension as most promising */
8182     best_pos= join->best_positions[idx];
8183     best_table= best_pos.table;
8184     /*
8185       Each subsequent loop of 'best_extension_by_limited_search' uses
8186       'join->positions' for cost estimates, therefore we have to update its
8187       value.
8188     */
8189     join->positions[idx]= best_pos;
8190 
8191     /*
8192       Update the interleaving state after extending the current partial plan
8193       with a new table.
8194       We are doing this here because best_extension_by_limited_search reverts
8195       the interleaving state to the one of the non-extended partial plan
8196       on exit.
8197     */
8198     bool is_interleave_error __attribute__((unused))=
8199       check_interleaving_with_nj (best_table);
8200     /* This has been already checked by best_extension_by_limited_search */
8201     DBUG_ASSERT(!is_interleave_error);
8202 
8203 
8204     /* find the position of 'best_table' in 'join->best_ref' */
8205     best_idx= idx;
8206     JOIN_TAB *pos= join->best_ref[best_idx];
8207     while (pos && best_table != pos)
8208       pos= join->best_ref[++best_idx];
8209     DBUG_ASSERT((pos != NULL)); // should always find 'best_table'
8210     /* move 'best_table' at the first free position in the array of joins */
8211     swap_variables(JOIN_TAB*, join->best_ref[idx], join->best_ref[best_idx]);
8212 
8213     /* compute the cost of the new plan extended with 'best_table' */
8214     record_count= COST_MULT(record_count, join->positions[idx].records_read);
8215     read_time= COST_ADD(read_time,
8216                          COST_ADD(join->positions[idx].read_time,
8217                                   record_count / (double) TIME_FOR_COMPARE));
8218 
8219     remaining_tables&= ~(best_table->table->map);
8220     --size_remain;
8221     ++idx;
8222 
8223     DBUG_EXECUTE("opt", print_plan(join, idx,
8224                                    record_count, read_time, read_time,
8225                                    "extended"););
8226   } while (TRUE);
8227 }
8228 
8229 
8230 /**
8231   Get cost of execution and fanout produced by selected tables in the join
8232   prefix (where prefix is defined as prefix in depth-first traversal)
8233 
8234   @param end_tab_idx               The number of last tab to be taken into
8235                                    account (in depth-first traversal prefix)
8236   @param filter_map                Bitmap of tables whose cost/fanout are to
8237                                    be taken into account.
8238   @param read_time_arg     [out]   store read time here
8239   @param record_count_arg  [out]   store record count here
8240 
8241   @note
8242 
8243   @returns
8244     read_time_arg and record_count_arg contain the computed cost and fanout
8245 */
8246 
get_partial_cost_and_fanout(int end_tab_idx,table_map filter_map,double * read_time_arg,double * record_count_arg)8247 void JOIN::get_partial_cost_and_fanout(int end_tab_idx,
8248                                        table_map filter_map,
8249                                        double *read_time_arg,
8250                                        double *record_count_arg)
8251 {
8252   double record_count= 1;
8253   double read_time= 0.0;
8254   double sj_inner_fanout= 1.0;
8255   JOIN_TAB *end_tab= NULL;
8256   JOIN_TAB *tab;
8257   int i;
8258   int last_sj_table= MAX_TABLES;
8259 
8260   /*
8261     Handle a special case where the join is degenerate, and produces no
8262     records
8263   */
8264   if (table_count == const_tables)
8265   {
8266     *read_time_arg= 0.0;
8267     /*
8268       We return 1, because
8269        - it is the pessimistic estimate (there might be grouping)
8270        - it's safer, as we're less likely to hit the edge cases in
8271          calculations.
8272     */
8273     *record_count_arg=1.0;
8274     return;
8275   }
8276 
8277   for (tab= first_depth_first_tab(this), i= const_tables;
8278        tab;
8279        tab= next_depth_first_tab(this, tab), i++)
8280   {
8281     end_tab= tab;
8282     if (i == end_tab_idx)
8283       break;
8284   }
8285 
8286   for (tab= first_depth_first_tab(this), i= const_tables;
8287        ;
8288        tab= next_depth_first_tab(this, tab), i++)
8289   {
8290     if (end_tab->bush_root_tab && end_tab->bush_root_tab == tab)
8291     {
8292       /*
8293         We've entered the SJM nest that contains the end_tab. The caller is
8294         - interested in fanout inside the nest (because that's how many times
8295           we'll invoke the attached WHERE conditions)
8296         - not interested in cost
8297       */
8298       record_count= 1.0;
8299       read_time= 0.0;
8300     }
8301 
8302     /*
8303       Ignore fanout (but not cost) from sj-inner tables, as long as
8304       the range that processes them finishes before the end_tab
8305     */
8306     if (tab->sj_strategy != SJ_OPT_NONE)
8307     {
8308       sj_inner_fanout= 1.0;
8309       last_sj_table= i + tab->n_sj_tables;
8310     }
8311 
8312     table_map cur_table_map;
8313     if (tab->table)
8314       cur_table_map= tab->table->map;
8315     else
8316     {
8317       /* This is a SJ-Materialization nest. Check all of its tables */
8318       TABLE *first_child= tab->bush_children->start->table;
8319       TABLE_LIST *sjm_nest= first_child->pos_in_table_list->embedding;
8320       cur_table_map= sjm_nest->nested_join->used_tables;
8321     }
8322     if (tab->records_read && (cur_table_map & filter_map))
8323     {
8324       record_count= COST_MULT(record_count, tab->records_read);
8325       read_time= COST_ADD(read_time,
8326                           COST_ADD(tab->read_time,
8327                                    record_count / (double) TIME_FOR_COMPARE));
8328       if (tab->emb_sj_nest)
8329         sj_inner_fanout= COST_MULT(sj_inner_fanout, tab->records_read);
8330 				     }
8331 
8332     if (i == last_sj_table)
8333     {
8334       record_count /= sj_inner_fanout;
8335       sj_inner_fanout= 1.0;
8336       last_sj_table= MAX_TABLES;
8337     }
8338 
8339     if (tab == end_tab)
8340       break;
8341   }
8342   *read_time_arg= read_time;// + record_count / TIME_FOR_COMPARE;
8343   *record_count_arg= record_count;
8344 }
8345 
8346 
8347 /*
8348   Get prefix cost and fanout. This function is different from
8349   get_partial_cost_and_fanout:
8350    - it operates on a JOIN that haven't yet finished its optimization phase (in
8351      particular, fix_semijoin_strategies_for_picked_join_order() and
8352      get_best_combination() haven't been called)
8353    - it assumes the the join prefix doesn't have any semi-join plans
8354 
8355   These assumptions are met by the caller of the function.
8356 */
8357 
get_prefix_cost_and_fanout(uint n_tables,double * read_time_arg,double * record_count_arg)8358 void JOIN::get_prefix_cost_and_fanout(uint n_tables,
8359                                       double *read_time_arg,
8360                                       double *record_count_arg)
8361 {
8362   double record_count= 1;
8363   double read_time= 0.0;
8364   for (uint i= const_tables; i < n_tables + const_tables ; i++)
8365   {
8366     if (best_positions[i].records_read)
8367     {
8368       record_count= COST_MULT(record_count, best_positions[i].records_read);
8369       read_time= COST_ADD(read_time, best_positions[i].read_time);
8370     }
8371     /* TODO: Take into account condition selectivities here */
8372   }
8373   *read_time_arg= read_time;// + record_count / TIME_FOR_COMPARE;
8374   *record_count_arg= record_count;
8375 }
8376 
8377 
8378 /**
8379   Estimate the number of rows that query execution will read.
8380 
8381   @todo This is a very pessimistic upper bound. Use join selectivity
8382   when available to produce a more realistic number.
8383 */
8384 
get_examined_rows()8385 double JOIN::get_examined_rows()
8386 {
8387   double examined_rows;
8388   double prev_fanout= 1;
8389   double records;
8390   JOIN_TAB *tab= first_breadth_first_tab();
8391   JOIN_TAB *prev_tab= tab;
8392 
8393   records= (double)tab->get_examined_rows();
8394 
8395   while ((tab= next_breadth_first_tab(first_breadth_first_tab(),
8396                                       top_join_tab_count, tab)))
8397   {
8398     prev_fanout= COST_MULT(prev_fanout, prev_tab->records_read);
8399     records=
8400       COST_ADD(records,
8401                COST_MULT((double) (tab->get_examined_rows()), prev_fanout));
8402     prev_tab= tab;
8403   }
8404   examined_rows= (double)
8405     (records > (double) HA_ROWS_MAX ? HA_ROWS_MAX : (ha_rows) records);
8406   return examined_rows;
8407 }
8408 
8409 
8410 /**
8411   @brief
8412   Get the selectivity of equalities between columns when joining a table
8413 
8414   @param join       The optimized join
8415   @param idx        The number of tables in the evaluated partual join
8416   @param s          The table to be joined for evaluation
8417   @param rem_tables The bitmap of tables to be joined later
8418   @param keyparts   The number of key parts to used when joining s
8419   @param ref_keyuse_steps Array of references to keyuses employed to join s
8420 */
8421 
8422 static
table_multi_eq_cond_selectivity(JOIN * join,uint idx,JOIN_TAB * s,table_map rem_tables,uint keyparts,uint16 * ref_keyuse_steps)8423 double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
8424                                        table_map rem_tables, uint keyparts,
8425                                        uint16 *ref_keyuse_steps)
8426 {
8427   double sel= 1.0;
8428   COND_EQUAL *cond_equal= join->cond_equal;
8429 
8430   if (!cond_equal || !cond_equal->current_level.elements)
8431     return sel;
8432 
8433    if (!s->keyuse)
8434     return sel;
8435 
8436   Item_equal *item_equal;
8437   List_iterator_fast<Item_equal> it(cond_equal->current_level);
8438   TABLE *table= s->table;
8439   table_map table_bit= table->map;
8440   POSITION *pos= &join->positions[idx];
8441 
8442   while ((item_equal= it++))
8443   {
8444     /*
8445       Check whether we need to take into account the selectivity of
8446       multiple equality item_equal. If this is the case multiply
8447       the current value of sel by this selectivity
8448     */
8449     table_map used_tables= item_equal->used_tables();
8450     if (!(used_tables & table_bit))
8451       continue;
8452     if (item_equal->get_const())
8453       continue;
8454 
8455     bool adjust_sel= FALSE;
8456     Item_equal_fields_iterator fi(*item_equal);
8457     while((fi++) && !adjust_sel)
8458     {
8459       Field *fld= fi.get_curr_field();
8460       if (fld->table->map != table_bit)
8461         continue;
8462       if (pos->key == 0)
8463         adjust_sel= TRUE;
8464       else
8465       {
8466         uint i;
8467         KEYUSE *keyuse= pos->key;
8468         uint key= keyuse->key;
8469         for (i= 0; i < keyparts; i++)
8470 	{
8471           if (i > 0)
8472             keyuse+= ref_keyuse_steps[i-1];
8473           uint fldno;
8474           if (is_hash_join_key_no(key))
8475 	    fldno= keyuse->keypart;
8476           else
8477             fldno= table->key_info[key].key_part[i].fieldnr - 1;
8478           if (fld->field_index == fldno)
8479             break;
8480         }
8481         keyuse= pos->key;
8482 
8483         if (i == keyparts)
8484 	{
8485           /*
8486             Field fld is included in multiple equality item_equal
8487             and is not a part of the ref key.
8488             The selectivity of the multiple equality must be taken
8489             into account unless one of the ref arguments is
8490             equal to fld.
8491 	  */
8492           adjust_sel= TRUE;
8493           for (uint j= 0; j < keyparts && adjust_sel; j++)
8494 	  {
8495             if (j > 0)
8496               keyuse+= ref_keyuse_steps[j-1];
8497             Item *ref_item= keyuse->val;
8498 	    if (ref_item->real_item()->type() == Item::FIELD_ITEM)
8499 	    {
8500               Item_field *field_item= (Item_field *) (ref_item->real_item());
8501               if (item_equal->contains(field_item->field))
8502                 adjust_sel= FALSE;
8503 	    }
8504           }
8505         }
8506       }
8507     }
8508     if (adjust_sel)
8509     {
8510       /*
8511         If ref == 0 and there are no fields in the multiple equality
8512         item_equal that belong to the tables joined prior to s
8513         then the selectivity of multiple equality will be set to 1.0.
8514       */
8515       double eq_fld_sel= 1.0;
8516       fi.rewind();
8517       while ((fi++))
8518       {
8519         double curr_eq_fld_sel;
8520         Field *fld= fi.get_curr_field();
8521         if (!(fld->table->map & ~(table_bit | rem_tables)))
8522           continue;
8523         curr_eq_fld_sel= get_column_avg_frequency(fld) /
8524                          fld->table->stat_records();
8525         if (curr_eq_fld_sel < 1.0)
8526           set_if_bigger(eq_fld_sel, curr_eq_fld_sel);
8527       }
8528       sel*= eq_fld_sel;
8529     }
8530   }
8531   return sel;
8532 }
8533 
8534 
8535 /**
8536   @brief
8537     Get the selectivity of conditions when joining a table
8538 
8539   @param join       The optimized join
8540   @param s          The table to be joined for evaluation
8541   @param rem_tables The bitmap of tables to be joined later
8542 
8543   @detail
8544     Get selectivity of conditions that can be applied when joining this table
8545     with previous tables.
8546 
8547     For quick selects and full table scans, selectivity of COND(this_table)
8548     is accounted for in matching_candidates_in_table(). Here, we only count
8549     selectivity of COND(this_table, previous_tables).
8550 
8551     For other access methods, we need to calculate selectivity of the whole
8552     condition, "COND(this_table) AND COND(this_table, previous_tables)".
8553 
8554   @retval
8555     selectivity of the conditions imposed on the rows of s
8556 */
8557 
8558 static
table_cond_selectivity(JOIN * join,uint idx,JOIN_TAB * s,table_map rem_tables)8559 double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
8560                               table_map rem_tables)
8561 {
8562   uint16 ref_keyuse_steps_buf[MAX_REF_PARTS];
8563   uint   ref_keyuse_size= MAX_REF_PARTS;
8564   uint16 *ref_keyuse_steps= ref_keyuse_steps_buf;
8565   Field *field;
8566   TABLE *table= s->table;
8567   MY_BITMAP *read_set= table->read_set;
8568   double sel= s->table->cond_selectivity;
8569   POSITION *pos= &join->positions[idx];
8570   uint keyparts= 0;
8571   uint found_part_ref_or_null= 0;
8572 
8573   if (pos->key != 0)
8574   {
8575     /*
8576       A ref access or hash join is used for this table. ref access is created
8577       from
8578 
8579         tbl.keypart1=expr1 AND tbl.keypart2=expr2 AND ...
8580 
8581       and it will only return rows for which this condition is satisified.
8582       Suppose, certain expr{i} is a constant. Since ref access only returns
8583       rows that satisfy
8584 
8585          tbl.keypart{i}=const       (*)
8586 
8587       then selectivity of this equality should not be counted in return value
8588       of this function. This function uses the value of
8589 
8590          table->cond_selectivity=selectivity(COND(tbl)) (**)
8591 
8592       as a starting point. This value includes selectivity of equality (*). We
8593       should somehow discount it.
8594 
8595       Looking at calculate_cond_selectivity_for_table(), one can see that that
8596       the value is not necessarily a direct multiplicand in
8597       table->cond_selectivity
8598 
8599       There are three possible ways to discount
8600       1. There is a potential range access on t.keypart{i}=const.
8601          (an important special case: the used ref access has a const prefix for
8602           which a range estimate is available)
8603 
8604       2. The field has a histogram. field[x]->cond_selectivity has the data.
8605 
8606       3. Use index stats on this index:
8607          rec_per_key[key_part+1]/rec_per_key[key_part]
8608 
8609       (TODO: more details about the "t.key=othertable.col" case)
8610     */
8611     KEYUSE *keyuse= pos->key;
8612     KEYUSE *prev_ref_keyuse= keyuse;
8613     uint key= keyuse->key;
8614     bool used_range_selectivity= false;
8615 
8616     /*
8617       Check if we have a prefix of key=const that matches a quick select.
8618     */
8619     if (!is_hash_join_key_no(key) && table->quick_keys.is_set(key))
8620     {
8621       key_part_map quick_key_map= (key_part_map(1) << table->quick_key_parts[key]) - 1;
8622       if (table->quick_rows[key] &&
8623           !(quick_key_map & ~table->const_key_parts[key]))
8624       {
8625         /*
8626           Ok, there is an equality for each of the key parts used by the
8627           quick select. This means, quick select's estimate can be reused to
8628           discount the selectivity of a prefix of a ref access.
8629         */
8630         for (; quick_key_map & 1 ; quick_key_map>>= 1)
8631         {
8632           while (keyuse->table == table && keyuse->key == key &&
8633                  keyuse->keypart == keyparts)
8634           {
8635             keyuse++;
8636           }
8637           keyparts++;
8638         }
8639         /*
8640           Here we discount selectivity of the constant range CR. To calculate
8641           this selectivity we use elements from the quick_rows[] array.
8642           If we have indexes i1,...,ik with the same prefix compatible
8643           with CR any of the estimate quick_rows[i1], ... quick_rows[ik] could
8644           be used for this calculation but here we don't know which one was
8645           actually used. So sel could be greater than 1 and we have to cap it.
8646           However if sel becomes greater than 2 then with high probability
8647           something went wrong.
8648 	*/
8649         sel /= (double)table->quick_rows[key] / (double) table->stat_records();
8650         set_if_smaller(sel, 1.0);
8651         used_range_selectivity= true;
8652       }
8653     }
8654 
8655     /*
8656       Go through the "keypart{N}=..." equalities and find those that were
8657       already taken into account in table->cond_selectivity.
8658     */
8659     keyuse= pos->key;
8660     keyparts=0;
8661     while (keyuse->table == table && keyuse->key == key)
8662     {
8663       if (!(keyuse->used_tables & (rem_tables | table->map)))
8664       {
8665         if (are_tables_local(s, keyuse->val->used_tables()))
8666 	{
8667           if (is_hash_join_key_no(key))
8668 	  {
8669             if (keyparts == keyuse->keypart)
8670               keyparts++;
8671           }
8672           else
8673 	  {
8674             if (keyparts == keyuse->keypart &&
8675                 !((keyuse->val->used_tables()) & ~pos->ref_depend_map) &&
8676                 !(found_part_ref_or_null & keyuse->optimize))
8677 	    {
8678               /* Found a KEYUSE object that will be used by ref access */
8679               keyparts++;
8680               found_part_ref_or_null|= keyuse->optimize & ~KEY_OPTIMIZE_EQ;
8681             }
8682           }
8683 
8684           if (keyparts > keyuse->keypart)
8685 	  {
8686             /* Ok this is the keyuse that will be used for ref access */
8687             if (!used_range_selectivity && keyuse->val->const_item())
8688             {
8689               uint fldno;
8690               if (is_hash_join_key_no(key))
8691                 fldno= keyuse->keypart;
8692               else
8693                 fldno= table->key_info[key].key_part[keyparts-1].fieldnr - 1;
8694 
8695               if (table->field[fldno]->cond_selectivity > 0)
8696 	      {
8697                 sel /= table->field[fldno]->cond_selectivity;
8698                 set_if_smaller(sel, 1.0);
8699               }
8700               /*
8701                TODO: we could do better here:
8702                  1. cond_selectivity might be =1 (the default) because quick
8703                     select on some index prevented us from analyzing
8704                     histogram for this column.
8705                  2. we could get an estimate through this?
8706                      rec_per_key[key_part-1] / rec_per_key[key_part]
8707               */
8708             }
8709             if (keyparts > 1)
8710 	    {
8711               /*
8712                 Prepare to set ref_keyuse_steps[keyparts-2]: resize the array
8713                 if it is not large enough
8714               */
8715               if (keyparts - 2 >= ref_keyuse_size)
8716               {
8717                 uint new_size= MY_MAX(ref_keyuse_size*2, keyparts);
8718                 void *new_buf;
8719                 if (!(new_buf= my_malloc(sizeof(*ref_keyuse_steps)*new_size,
8720                                          MYF(0))))
8721                 {
8722                   sel= 1.0; // As if no selectivity was computed
8723                   goto exit;
8724                 }
8725                 memcpy(new_buf, ref_keyuse_steps,
8726                        sizeof(*ref_keyuse_steps)*ref_keyuse_size);
8727                 if (ref_keyuse_steps != ref_keyuse_steps_buf)
8728                   my_free(ref_keyuse_steps);
8729 
8730                 ref_keyuse_steps= (uint16*)new_buf;
8731                 ref_keyuse_size= new_size;
8732               }
8733 
8734               ref_keyuse_steps[keyparts-2]= (uint16)(keyuse - prev_ref_keyuse);
8735               prev_ref_keyuse= keyuse;
8736             }
8737           }
8738 	}
8739       }
8740       keyuse++;
8741     }
8742   }
8743   else
8744   {
8745     /*
8746       The table is accessed with full table scan, or quick select.
8747       Selectivity of COND(table) is already accounted for in
8748       matching_candidates_in_table().
8749     */
8750     sel= 1;
8751   }
8752 
8753   /*
8754     If the field f from the table is equal to a field from one the
8755     earlier joined tables then the selectivity of the range conditions
8756     over the field f must be discounted.
8757 
8758     We need to discount selectivity only if we're using ref-based
8759     access method (and have sel!=1).
8760     If we use ALL/range/index_merge, then sel==1, and no need to discount.
8761   */
8762   if (pos->key != NULL)
8763   {
8764     for (Field **f_ptr=table->field ; (field= *f_ptr) ; f_ptr++)
8765     {
8766       if (!bitmap_is_set(read_set, field->field_index) ||
8767           !field->next_equal_field)
8768         continue;
8769       for (Field *next_field= field->next_equal_field;
8770            next_field != field;
8771            next_field= next_field->next_equal_field)
8772       {
8773         if (!(next_field->table->map & rem_tables) && next_field->table != table)
8774         {
8775           if (field->cond_selectivity > 0)
8776 	  {
8777             sel/= field->cond_selectivity;
8778             set_if_smaller(sel, 1.0);
8779           }
8780           break;
8781         }
8782       }
8783     }
8784   }
8785 
8786   sel*= table_multi_eq_cond_selectivity(join, idx, s, rem_tables,
8787                                         keyparts, ref_keyuse_steps);
8788 exit:
8789   if (ref_keyuse_steps != ref_keyuse_steps_buf)
8790     my_free(ref_keyuse_steps);
8791   return sel;
8792 }
8793 
8794 
8795 /**
8796   Find a good, possibly optimal, query execution plan (QEP) by a possibly
8797   exhaustive search.
8798 
8799     The procedure searches for the optimal ordering of the query tables in set
8800     'remaining_tables' of size N, and the corresponding optimal access paths to
8801     each table. The choice of a table order and an access path for each table
8802     constitutes a query execution plan (QEP) that fully specifies how to
8803     execute the query.
8804 
8805     The maximal size of the found plan is controlled by the parameter
8806     'search_depth'. When search_depth == N, the resulting plan is complete and
8807     can be used directly as a QEP. If search_depth < N, the found plan consists
8808     of only some of the query tables. Such "partial" optimal plans are useful
8809     only as input to query optimization procedures, and cannot be used directly
8810     to execute a query.
8811 
8812     The algorithm begins with an empty partial plan stored in 'join->positions'
8813     and a set of N tables - 'remaining_tables'. Each step of the algorithm
8814     evaluates the cost of the partial plan extended by all access plans for
8815     each of the relations in 'remaining_tables', expands the current partial
8816     plan with the access plan that results in lowest cost of the expanded
8817     partial plan, and removes the corresponding relation from
8818     'remaining_tables'. The algorithm continues until it either constructs a
8819     complete optimal plan, or constructs an optimal plartial plan with size =
8820     search_depth.
8821 
8822     The final optimal plan is stored in 'join->best_positions'. The
8823     corresponding cost of the optimal plan is in 'join->best_read'.
8824 
8825   @note
8826     The procedure uses a recursive depth-first search where the depth of the
8827     recursion (and thus the exhaustiveness of the search) is controlled by the
8828     parameter 'search_depth'.
8829 
8830   @note
8831     The pseudocode below describes the algorithm of
8832     'best_extension_by_limited_search'. The worst-case complexity of this
8833     algorithm is O(N*N^search_depth/search_depth). When serch_depth >= N, then
8834     the complexity of greedy_search is O(N!).
8835 
8836     @code
8837     procedure best_extension_by_limited_search(
8838       pplan in,             // in, partial plan of tables-joined-so-far
8839       pplan_cost,           // in, cost of pplan
8840       remaining_tables,     // in, set of tables not referenced in pplan
8841       best_plan_so_far,     // in/out, best plan found so far
8842       best_plan_so_far_cost,// in/out, cost of best_plan_so_far
8843       search_depth)         // in, maximum size of the plans being considered
8844     {
8845       for each table T from remaining_tables
8846       {
8847         // Calculate the cost of using table T as above
8848         cost = complex-series-of-calculations;
8849 
8850         // Add the cost to the cost so far.
8851         pplan_cost+= cost;
8852 
8853         if (pplan_cost >= best_plan_so_far_cost)
8854           // pplan_cost already too great, stop search
8855           continue;
8856 
8857         pplan= expand pplan by best_access_method;
8858         remaining_tables= remaining_tables - table T;
8859         if (remaining_tables is not an empty set
8860             and
8861             search_depth > 1)
8862         {
8863           best_extension_by_limited_search(pplan, pplan_cost,
8864                                            remaining_tables,
8865                                            best_plan_so_far,
8866                                            best_plan_so_far_cost,
8867                                            search_depth - 1);
8868         }
8869         else
8870         {
8871           best_plan_so_far_cost= pplan_cost;
8872           best_plan_so_far= pplan;
8873         }
8874       }
8875     }
8876     @endcode
8877 
8878   @note
8879     When 'best_extension_by_limited_search' is called for the first time,
8880     'join->best_read' must be set to the largest possible value (e.g. DBL_MAX).
8881     The actual implementation provides a way to optionally use pruning
8882     heuristic (controlled by the parameter 'prune_level') to reduce the search
8883     space by skipping some partial plans.
8884 
8885   @note
8886     The parameter 'search_depth' provides control over the recursion
8887     depth, and thus the size of the resulting optimal plan.
8888 
8889   @param join             pointer to the structure providing all context info
8890                           for the query
8891   @param remaining_tables set of tables not included into the partial plan yet
8892   @param idx              length of the partial QEP in 'join->positions';
8893                           since a depth-first search is used, also corresponds
8894                           to the current depth of the search tree;
8895                           also an index in the array 'join->best_ref';
8896   @param record_count     estimate for the number of records returned by the
8897                           best partial plan
8898   @param read_time        the cost of the best partial plan
8899   @param search_depth     maximum depth of the recursion and thus size of the
8900                           found optimal plan
8901                           (0 < search_depth <= join->tables+1).
8902   @param prune_level      pruning heuristics that should be applied during
8903                           optimization
8904                           (values: 0 = EXHAUSTIVE, 1 = PRUNE_BY_TIME_OR_ROWS)
8905   @param use_cond_selectivity  specifies how the selectivity of the conditions
8906                           pushed to a table should be taken into account
8907 
8908   @retval
8909     FALSE       ok
8910   @retval
8911     TRUE        Fatal error
8912 */
8913 
8914 static bool
best_extension_by_limited_search(JOIN * join,table_map remaining_tables,uint idx,double record_count,double read_time,uint search_depth,uint prune_level,uint use_cond_selectivity)8915 best_extension_by_limited_search(JOIN      *join,
8916                                  table_map remaining_tables,
8917                                  uint      idx,
8918                                  double    record_count,
8919                                  double    read_time,
8920                                  uint      search_depth,
8921                                  uint      prune_level,
8922                                  uint      use_cond_selectivity)
8923 {
8924   DBUG_ENTER("best_extension_by_limited_search");
8925 
8926   THD *thd= join->thd;
8927 
8928   DBUG_EXECUTE_IF("show_explain_probe_best_ext_lim_search",
8929                   if (dbug_user_var_equals_int(thd,
8930                                                "show_explain_probe_select_id",
8931                                                join->select_lex->select_number))
8932                         dbug_serve_apcs(thd, 1);
8933                  );
8934 
8935   if (unlikely(thd->check_killed()))  // Abort
8936     DBUG_RETURN(TRUE);
8937 
8938   DBUG_EXECUTE("opt", print_plan(join, idx, read_time, record_count, idx,
8939                                  "SOFAR:"););
8940 
8941   /*
8942      'join' is a partial plan with lower cost than the best plan so far,
8943      so continue expanding it further with the tables in 'remaining_tables'.
8944   */
8945   JOIN_TAB *s;
8946   double best_record_count= DBL_MAX;
8947   double best_read_time=    DBL_MAX;
8948   bool disable_jbuf= join->thd->variables.join_cache_level == 0;
8949 
8950   DBUG_EXECUTE("opt", print_plan(join, idx, record_count, read_time, read_time,
8951                                 "part_plan"););
8952 
8953   /*
8954     If we are searching for the execution plan of a materialized semi-join nest
8955     then allowed_tables contains bits only for the tables from this nest.
8956   */
8957   table_map allowed_tables= ~(table_map)0;
8958   if (join->emb_sjm_nest)
8959     allowed_tables= join->emb_sjm_nest->sj_inner_tables & ~join->const_table_map;
8960 
8961   for (JOIN_TAB **pos= join->best_ref + idx ; (s= *pos) ; pos++)
8962   {
8963     table_map real_table_bit= s->table->map;
8964     if ((remaining_tables & real_table_bit) &&
8965         (allowed_tables & real_table_bit) &&
8966         !(remaining_tables & s->dependent) &&
8967         (!idx || !check_interleaving_with_nj(s)))
8968     {
8969       double current_record_count, current_read_time;
8970       POSITION *position= join->positions + idx;
8971 
8972       /* Find the best access method from 's' to the current partial plan */
8973       POSITION loose_scan_pos;
8974       best_access_path(join, s, remaining_tables, join->positions, idx,
8975                        disable_jbuf, record_count, position, &loose_scan_pos);
8976 
8977       /* Compute the cost of extending the plan with 's' */
8978       current_record_count= COST_MULT(record_count, position->records_read);
8979       current_read_time=COST_ADD(read_time,
8980                                  COST_ADD(position->read_time,
8981                                           current_record_count /
8982                                           (double) TIME_FOR_COMPARE));
8983 
8984       advance_sj_state(join, remaining_tables, idx, &current_record_count,
8985                        &current_read_time, &loose_scan_pos);
8986 
8987       /* Expand only partial plans with lower cost than the best QEP so far */
8988       if (current_read_time >= join->best_read)
8989       {
8990         DBUG_EXECUTE("opt", print_plan(join, idx+1,
8991                                        current_record_count,
8992                                        read_time,
8993                                        current_read_time,
8994                                        "prune_by_cost"););
8995         restore_prev_nj_state(s);
8996         restore_prev_sj_state(remaining_tables, s, idx);
8997         continue;
8998       }
8999 
9000       /*
9001         Prune some less promising partial plans. This heuristic may miss
9002         the optimal QEPs, thus it results in a non-exhaustive search.
9003       */
9004       if (prune_level == 1)
9005       {
9006         if (best_record_count > current_record_count ||
9007             best_read_time > current_read_time ||
9008             (idx == join->const_tables &&  // 's' is the first table in the QEP
9009             s->table == join->sort_by_table))
9010         {
9011           if (best_record_count >= current_record_count &&
9012               best_read_time >= current_read_time &&
9013               /* TODO: What is the reasoning behind this condition? */
9014               (!(s->key_dependent & allowed_tables & remaining_tables) ||
9015                join->positions[idx].records_read < 2.0))
9016           {
9017             best_record_count= current_record_count;
9018             best_read_time=    current_read_time;
9019           }
9020         }
9021         else
9022         {
9023           DBUG_EXECUTE("opt", print_plan(join, idx+1,
9024                                          current_record_count,
9025                                          read_time,
9026                                          current_read_time,
9027                                          "pruned_by_heuristic"););
9028           restore_prev_nj_state(s);
9029           restore_prev_sj_state(remaining_tables, s, idx);
9030           continue;
9031         }
9032       }
9033 
9034       double pushdown_cond_selectivity= 1.0;
9035       if (use_cond_selectivity > 1)
9036         pushdown_cond_selectivity= table_cond_selectivity(join, idx, s,
9037 				                          remaining_tables &
9038                                                           ~real_table_bit);
9039       join->positions[idx].cond_selectivity= pushdown_cond_selectivity;
9040       double partial_join_cardinality= current_record_count *
9041                                         pushdown_cond_selectivity;
9042       if ( (search_depth > 1) && (remaining_tables & ~real_table_bit) & allowed_tables )
9043       { /* Recursively expand the current partial plan */
9044         swap_variables(JOIN_TAB*, join->best_ref[idx], *pos);
9045         if (best_extension_by_limited_search(join,
9046                                              remaining_tables & ~real_table_bit,
9047                                              idx + 1,
9048                                              partial_join_cardinality,
9049                                              current_read_time,
9050                                              search_depth - 1,
9051                                              prune_level,
9052                                              use_cond_selectivity))
9053           DBUG_RETURN(TRUE);
9054         swap_variables(JOIN_TAB*, join->best_ref[idx], *pos);
9055       }
9056       else
9057       { /*
9058           'join' is either the best partial QEP with 'search_depth' relations,
9059           or the best complete QEP so far, whichever is smaller.
9060         */
9061         if (join->sort_by_table &&
9062             join->sort_by_table !=
9063             join->positions[join->const_tables].table->table)
9064           /*
9065              We may have to make a temp table, note that this is only a
9066              heuristic since we cannot know for sure at this point.
9067              Hence it may be wrong.
9068           */
9069           current_read_time= COST_ADD(current_read_time, current_record_count);
9070         if (current_read_time < join->best_read)
9071         {
9072           memcpy((uchar*) join->best_positions, (uchar*) join->positions,
9073                  sizeof(POSITION) * (idx + 1));
9074           join->join_record_count= partial_join_cardinality;
9075           join->best_read= current_read_time - 0.001;
9076         }
9077         DBUG_EXECUTE("opt", print_plan(join, idx+1,
9078                                        current_record_count,
9079                                        read_time,
9080                                        current_read_time,
9081                                        "full_plan"););
9082       }
9083       restore_prev_nj_state(s);
9084       restore_prev_sj_state(remaining_tables, s, idx);
9085     }
9086   }
9087   DBUG_RETURN(FALSE);
9088 }
9089 
9090 
9091 /**
9092   Find how much space the prevous read not const tables takes in cache.
9093 */
9094 
calc_used_field_length(bool max_fl)9095 void JOIN_TAB::calc_used_field_length(bool max_fl)
9096 {
9097   uint null_fields,blobs,fields;
9098   ulong rec_length;
9099   Field **f_ptr,*field;
9100   uint uneven_bit_fields;
9101   MY_BITMAP *read_set= table->read_set;
9102 
9103   uneven_bit_fields= null_fields= blobs= fields= rec_length=0;
9104   for (f_ptr=table->field ; (field= *f_ptr) ; f_ptr++)
9105   {
9106     if (bitmap_is_set(read_set, field->field_index))
9107     {
9108       uint flags=field->flags;
9109       fields++;
9110       rec_length+=field->pack_length();
9111       if (flags & BLOB_FLAG)
9112 	blobs++;
9113       if (!(flags & NOT_NULL_FLAG))
9114 	null_fields++;
9115       if (field->type() == MYSQL_TYPE_BIT &&
9116           ((Field_bit*)field)->bit_len)
9117         uneven_bit_fields++;
9118     }
9119   }
9120   if (null_fields || uneven_bit_fields)
9121     rec_length+=(table->s->null_fields+7)/8;
9122   if (table->maybe_null)
9123     rec_length+=sizeof(my_bool);
9124 
9125   /* Take into account that DuplicateElimination may need to store rowid */
9126   uint rowid_add_size= 0;
9127   if (keep_current_rowid)
9128   {
9129     rowid_add_size= table->file->ref_length;
9130     rec_length += rowid_add_size;
9131     fields++;
9132   }
9133 
9134   if (max_fl)
9135   {
9136     // TODO: to improve this estimate for max expected length
9137     if (blobs)
9138     {
9139       ulong blob_length= table->file->stats.mean_rec_length;
9140       if (ULONG_MAX - rec_length > blob_length)
9141         rec_length+=  blob_length;
9142       else
9143         rec_length= ULONG_MAX;
9144     }
9145     max_used_fieldlength= rec_length;
9146   }
9147   else if (table->file->stats.mean_rec_length)
9148     set_if_smaller(rec_length, table->file->stats.mean_rec_length + rowid_add_size);
9149 
9150   used_fields=fields;
9151   used_fieldlength=rec_length;
9152   used_blobs=blobs;
9153   used_null_fields= null_fields;
9154   used_uneven_bit_fields= uneven_bit_fields;
9155 }
9156 
9157 
9158 /*
9159   @brief
9160   Extract pushdown conditions for a table scan
9161 
9162   @details
9163   This functions extracts pushdown conditions usable when this table is scanned.
9164   The conditions are extracted either from WHERE or from ON expressions.
9165   The conditions are attached to the field cache_select of this table.
9166 
9167   @note
9168   Currently the extracted conditions are used only by BNL and BNLH join.
9169   algorithms.
9170 
9171   @retval  0   on success
9172            1   otherwise
9173 */
9174 
make_scan_filter()9175 int JOIN_TAB::make_scan_filter()
9176 {
9177   COND *tmp;
9178   DBUG_ENTER("make_scan_filter");
9179 
9180   Item *cond= is_inner_table_of_outer_join() ?
9181                 *get_first_inner_table()->on_expr_ref : join->conds;
9182 
9183   if (cond &&
9184       (tmp= make_cond_for_table(join->thd, cond,
9185                                join->const_table_map | table->map,
9186 			       table->map, -1, FALSE, TRUE)))
9187   {
9188      DBUG_EXECUTE("where",print_where(tmp,"cache", QT_ORDINARY););
9189      if (!(cache_select=
9190           (SQL_SELECT*) join->thd->memdup((uchar*) select, sizeof(SQL_SELECT))))
9191 	DBUG_RETURN(1);
9192      cache_select->cond= tmp;
9193      cache_select->read_tables=join->const_table_map;
9194   }
9195   DBUG_RETURN(0);
9196 }
9197 
9198 
9199 /**
9200   @brief
9201   Check whether hash join algorithm can be used to join this table
9202 
9203   @details
9204   This function finds out whether the ref items that have been chosen
9205   by the planner to access this table can be used for hash join algorithms.
9206   The answer depends on a certain property of the the fields of the
9207   joined tables on which the hash join key is built.
9208 
9209   @note
9210   At present the function is supposed to be called only after the function
9211   get_best_combination has been called.
9212 
9213   @retval TRUE    it's possible to use hash join to join this table
9214   @retval FALSE   otherwise
9215 */
9216 
hash_join_is_possible()9217 bool JOIN_TAB::hash_join_is_possible()
9218 {
9219   if (type != JT_REF && type != JT_EQ_REF)
9220     return FALSE;
9221   if (!is_ref_for_hash_join())
9222   {
9223     KEY *keyinfo= table->key_info + ref.key;
9224     return keyinfo->key_part[0].field->hash_join_is_possible();
9225   }
9226   return TRUE;
9227 }
9228 
9229 
9230 /**
9231   @brief
9232   Check whether a KEYUSE can be really used for access this join table
9233 
9234   @param join    Join structure with the best join order
9235                  for which the check is performed
9236   @param keyuse  Evaluated KEYUSE structure
9237 
9238   @details
9239   This function is supposed to be used after the best execution plan have been
9240   already chosen and the JOIN_TAB array for the best join order been already set.
9241   For a given KEYUSE to access this JOIN_TAB in the best execution plan the
9242   function checks whether it really can be used. The function first performs
9243   the check with access_from_tables_is_allowed(). If it succeeds it checks
9244   whether the keyuse->val does not use some fields of a materialized semijoin
9245   nest that cannot be used to build keys to access outer tables.
9246   Such KEYUSEs exists for the query like this:
9247     select * from ot
9248     where ot.c in (select it1.c from it1, it2 where it1.c=f(it2.c))
9249   Here we have two KEYUSEs to access table ot: with val=it1.c and val=f(it2.c).
9250   However if the subquery was materialized the second KEYUSE cannot be employed
9251   to access ot.
9252 
9253   @retval true  the given keyuse can be used for ref access of this JOIN_TAB
9254   @retval false otherwise
9255 */
9256 
keyuse_is_valid_for_access_in_chosen_plan(JOIN * join,KEYUSE * keyuse)9257 bool JOIN_TAB::keyuse_is_valid_for_access_in_chosen_plan(JOIN *join,
9258                                                          KEYUSE *keyuse)
9259 {
9260   if (!access_from_tables_is_allowed(keyuse->used_tables,
9261                                      join->sjm_lookup_tables))
9262     return false;
9263   if (join->sjm_scan_tables & table->map)
9264     return true;
9265   table_map keyuse_sjm_scan_tables= keyuse->used_tables &
9266                                     join->sjm_scan_tables;
9267   if (!keyuse_sjm_scan_tables)
9268     return true;
9269   uint sjm_tab_nr= 0;
9270   while (!(keyuse_sjm_scan_tables & table_map(1) << sjm_tab_nr))
9271     sjm_tab_nr++;
9272   JOIN_TAB *sjm_tab= join->map2table[sjm_tab_nr];
9273   TABLE_LIST *emb_sj_nest= sjm_tab->emb_sj_nest;
9274   if (!(emb_sj_nest->sj_mat_info && emb_sj_nest->sj_mat_info->is_used &&
9275         emb_sj_nest->sj_mat_info->is_sj_scan))
9276     return true;
9277   st_select_lex *sjm_sel= emb_sj_nest->sj_subq_pred->unit->first_select();
9278   for (uint i= 0; i < sjm_sel->item_list.elements; i++)
9279   {
9280     DBUG_ASSERT(sjm_sel->ref_pointer_array[i]->real_item()->type() == Item::FIELD_ITEM);
9281     if (keyuse->val->real_item()->type() == Item::FIELD_ITEM)
9282     {
9283       Field *field = ((Item_field*)sjm_sel->ref_pointer_array[i]->real_item())->field;
9284       if (field->eq(((Item_field*)keyuse->val->real_item())->field))
9285         return true;
9286     }
9287   }
9288   return false;
9289 }
9290 
9291 
9292 static uint
cache_record_length(JOIN * join,uint idx)9293 cache_record_length(JOIN *join,uint idx)
9294 {
9295   uint length=0;
9296   JOIN_TAB **pos,**end;
9297 
9298   for (pos=join->best_ref+join->const_tables,end=join->best_ref+idx ;
9299        pos != end ;
9300        pos++)
9301   {
9302     JOIN_TAB *join_tab= *pos;
9303     length+= join_tab->get_used_fieldlength();
9304   }
9305   return length;
9306 }
9307 
9308 
9309 /*
9310   Get the number of different row combinations for subset of partial join
9311 
9312   SYNOPSIS
9313     prev_record_reads()
9314       join       The join structure
9315       idx        Number of tables in the partial join order (i.e. the
9316                  partial join order is in join->positions[0..idx-1])
9317       found_ref  Bitmap of tables for which we need to find # of distinct
9318                  row combinations.
9319 
9320   DESCRIPTION
9321     Given a partial join order (in join->positions[0..idx-1]) and a subset of
9322     tables within that join order (specified in found_ref), find out how many
9323     distinct row combinations of subset tables will be in the result of the
9324     partial join order.
9325 
9326     This is used as follows: Suppose we have a table accessed with a ref-based
9327     method. The ref access depends on current rows of tables in found_ref.
9328     We want to count # of different ref accesses. We assume two ref accesses
9329     will be different if at least one of access parameters is different.
9330     Example: consider a query
9331 
9332     SELECT * FROM t1, t2, t3 WHERE t1.key=c1 AND t2.key=c2 AND t3.key=t1.field
9333 
9334     and a join order:
9335       t1,  ref access on t1.key=c1
9336       t2,  ref access on t2.key=c2
9337       t3,  ref access on t3.key=t1.field
9338 
9339     For t1: n_ref_scans = 1, n_distinct_ref_scans = 1
9340     For t2: n_ref_scans = records_read(t1), n_distinct_ref_scans=1
9341     For t3: n_ref_scans = records_read(t1)*records_read(t2)
9342             n_distinct_ref_scans = #records_read(t1)
9343 
9344     The reason for having this function (at least the latest version of it)
9345     is that we need to account for buffering in join execution.
9346 
9347     An edge-case example: if we have a non-first table in join accessed via
9348     ref(const) or ref(param) where there is a small number of different
9349     values of param, then the access will likely hit the disk cache and will
9350     not require any disk seeks.
9351 
9352     The proper solution would be to assume an LRU disk cache of some size,
9353     calculate probability of cache hits, etc. For now we just count
9354     identical ref accesses as one.
9355 
9356   RETURN
9357     Expected number of row combinations
9358 */
9359 
9360 double
prev_record_reads(const POSITION * positions,uint idx,table_map found_ref)9361 prev_record_reads(const POSITION *positions, uint idx, table_map found_ref)
9362 {
9363   double found=1.0;
9364   const POSITION *pos_end= positions - 1;
9365   for (const POSITION *pos= positions + idx - 1; pos != pos_end; pos--)
9366   {
9367     if (pos->table->table->map & found_ref)
9368     {
9369       found_ref|= pos->ref_depend_map;
9370       /*
9371         For the case of "t1 LEFT JOIN t2 ON ..." where t2 is a const table
9372         with no matching row we will get position[t2].records_read==0.
9373         Actually the size of output is one null-complemented row, therefore
9374         we will use value of 1 whenever we get records_read==0.
9375 
9376         Note
9377         - the above case can't occur if inner part of outer join has more
9378           than one table: table with no matches will not be marked as const.
9379 
9380         - Ideally we should add 1 to records_read for every possible null-
9381           complemented row. We're not doing it because: 1. it will require
9382           non-trivial code and add overhead. 2. The value of records_read
9383           is an inprecise estimate and adding 1 (or, in the worst case,
9384           #max_nested_outer_joins=64-1) will not make it any more precise.
9385       */
9386       if (pos->records_read)
9387       {
9388         found= COST_MULT(found, pos->records_read);
9389         found*= pos->cond_selectivity;
9390       }
9391      }
9392   }
9393   return found;
9394 }
9395 
9396 
9397 /*
9398   Enumerate join tabs in breadth-first fashion, including const tables.
9399 */
9400 
next_breadth_first_tab(JOIN_TAB * first_top_tab,uint n_top_tabs_count,JOIN_TAB * tab)9401 static JOIN_TAB *next_breadth_first_tab(JOIN_TAB *first_top_tab,
9402                                         uint n_top_tabs_count, JOIN_TAB *tab)
9403 {
9404   n_top_tabs_count += tab->join->aggr_tables;
9405   if (!tab->bush_root_tab)
9406   {
9407     /* We're at top level. Get the next top-level tab */
9408     tab++;
9409     if (tab < first_top_tab + n_top_tabs_count)
9410       return tab;
9411 
9412     /* No more top-level tabs. Switch to enumerating SJM nest children */
9413     tab= first_top_tab;
9414   }
9415   else
9416   {
9417     /* We're inside of an SJM nest */
9418     if (!tab->last_leaf_in_bush)
9419     {
9420       /* There's one more table in the nest, return it. */
9421       return ++tab;
9422     }
9423     else
9424     {
9425       /*
9426         There are no more tables in this nest. Get out of it and then we'll
9427         proceed to the next nest.
9428       */
9429       tab= tab->bush_root_tab + 1;
9430     }
9431   }
9432 
9433   /*
9434     Ok, "tab" points to a top-level table, and we need to find the next SJM
9435     nest and enter it.
9436   */
9437   for (; tab < first_top_tab + n_top_tabs_count; tab++)
9438   {
9439     if (tab->bush_children)
9440       return tab->bush_children->start;
9441   }
9442   return NULL;
9443 }
9444 
9445 
9446 /*
9447   Enumerate JOIN_TABs in "EXPLAIN order". This order
9448    - const tabs are included
9449    - we enumerate "optimization tabs".
9450    -
9451 */
9452 
first_explain_order_tab(JOIN * join)9453 JOIN_TAB *first_explain_order_tab(JOIN* join)
9454 {
9455   JOIN_TAB* tab;
9456   tab= join->join_tab;
9457   if (!tab)
9458     return NULL; /* Can happen when when the tables were optimized away */
9459   return (tab->bush_children) ? tab->bush_children->start : tab;
9460 }
9461 
9462 
next_explain_order_tab(JOIN * join,JOIN_TAB * tab)9463 JOIN_TAB *next_explain_order_tab(JOIN* join, JOIN_TAB* tab)
9464 {
9465   /* If we're inside SJM nest and have reached its end, get out */
9466   if (tab->last_leaf_in_bush)
9467     return tab->bush_root_tab;
9468 
9469   /* Move to next tab in the array we're traversing */
9470   tab++;
9471 
9472   if (tab == join->join_tab + join->top_join_tab_count)
9473     return NULL; /* Outside SJM nest and reached EOF */
9474 
9475   if (tab->bush_children)
9476     return tab->bush_children->start;
9477 
9478   return tab;
9479 }
9480 
9481 
9482 
first_top_level_tab(JOIN * join,enum enum_with_const_tables const_tbls)9483 JOIN_TAB *first_top_level_tab(JOIN *join, enum enum_with_const_tables const_tbls)
9484 {
9485   JOIN_TAB *tab= join->join_tab;
9486   if (const_tbls == WITHOUT_CONST_TABLES)
9487   {
9488     if (join->const_tables == join->table_count || !tab)
9489       return NULL;
9490     tab += join->const_tables;
9491   }
9492   return tab;
9493 }
9494 
9495 
next_top_level_tab(JOIN * join,JOIN_TAB * tab)9496 JOIN_TAB *next_top_level_tab(JOIN *join, JOIN_TAB *tab)
9497 {
9498   tab= next_breadth_first_tab(join->first_breadth_first_tab(),
9499                               join->top_join_tab_count, tab);
9500   if (tab && tab->bush_root_tab)
9501     tab= NULL;
9502   return tab;
9503 }
9504 
9505 
first_linear_tab(JOIN * join,enum enum_with_bush_roots include_bush_roots,enum enum_with_const_tables const_tbls)9506 JOIN_TAB *first_linear_tab(JOIN *join,
9507                            enum enum_with_bush_roots include_bush_roots,
9508                            enum enum_with_const_tables const_tbls)
9509 {
9510   JOIN_TAB *first= join->join_tab;
9511 
9512   if (!first)
9513     return NULL;
9514 
9515   if (const_tbls == WITHOUT_CONST_TABLES)
9516     first+= join->const_tables;
9517 
9518   if (first >= join->join_tab + join->top_join_tab_count)
9519     return NULL; /* All are const tables */
9520 
9521   if (first->bush_children && include_bush_roots == WITHOUT_BUSH_ROOTS)
9522   {
9523     /* This JOIN_TAB is a SJM nest; Start from first table in nest */
9524     return first->bush_children->start;
9525   }
9526 
9527   return first;
9528 }
9529 
9530 
9531 /*
9532   A helper function to loop over all join's join_tab in sequential fashion
9533 
9534   DESCRIPTION
9535     Depending on include_bush_roots parameter, JOIN_TABs that represent
9536     SJM-scan/lookups are either returned or omitted.
9537 
9538     SJM-Bush children are returned right after (or in place of) their container
9539     join tab (TODO: does anybody depend on this? A: make_join_readinfo() seems
9540     to)
9541 
9542     For example, if we have this structure:
9543 
9544        ot1--ot2--sjm1----------------ot3-...
9545                   |
9546                   +--it1--it2--it3
9547 
9548     calls to next_linear_tab( include_bush_roots=TRUE) will return:
9549 
9550       ot1 ot2 sjm1 it1 it2 it3 ot3 ...
9551 
9552    while calls to next_linear_tab( include_bush_roots=FALSE) will return:
9553 
9554       ot1 ot2 it1 it2 it3 ot3 ...
9555 
9556    (note that sjm1 won't be returned).
9557 */
9558 
next_linear_tab(JOIN * join,JOIN_TAB * tab,enum enum_with_bush_roots include_bush_roots)9559 JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab,
9560                           enum enum_with_bush_roots include_bush_roots)
9561 {
9562   if (include_bush_roots == WITH_BUSH_ROOTS && tab->bush_children)
9563   {
9564     /* This JOIN_TAB is a SJM nest; Start from first table in nest */
9565     return tab->bush_children->start;
9566   }
9567 
9568   DBUG_ASSERT(!tab->last_leaf_in_bush || tab->bush_root_tab);
9569 
9570   if (tab->bush_root_tab)       /* Are we inside an SJM nest */
9571   {
9572     /* Inside SJM nest */
9573     if (!tab->last_leaf_in_bush)
9574       return tab+1;              /* Return next in nest */
9575     /* Continue from the sjm on the top level */
9576     tab= tab->bush_root_tab;
9577   }
9578 
9579   /* If no more JOIN_TAB's on the top level */
9580   if (++tab >= join->join_tab + join->exec_join_tab_cnt() + join->aggr_tables)
9581     return NULL;
9582 
9583   if (include_bush_roots == WITHOUT_BUSH_ROOTS && tab->bush_children)
9584   {
9585     /* This JOIN_TAB is a SJM nest; Start from first table in nest */
9586     tab= tab->bush_children->start;
9587   }
9588   return tab;
9589 }
9590 
9591 
9592 /*
9593   Start to iterate over all join tables in bush-children-first order, excluding
9594   the const tables (see next_depth_first_tab() comment for details)
9595 */
9596 
first_depth_first_tab(JOIN * join)9597 JOIN_TAB *first_depth_first_tab(JOIN* join)
9598 {
9599   JOIN_TAB* tab;
9600   /* This means we're starting the enumeration */
9601   if (join->const_tables == join->top_join_tab_count || !join->join_tab)
9602     return NULL;
9603 
9604   tab= join->join_tab + join->const_tables;
9605 
9606   return (tab->bush_children) ? tab->bush_children->start : tab;
9607 }
9608 
9609 
9610 /*
9611   A helper function to iterate over all join tables in bush-children-first order
9612 
9613   DESCRIPTION
9614 
9615   For example, for this join plan
9616 
9617     ot1--ot2--sjm1------------ot3-...
9618                |
9619                |
9620               it1--it2--it3
9621 
9622   call to first_depth_first_tab() will return ot1, and subsequent calls to
9623   next_depth_first_tab() will return:
9624 
9625      ot2 it1 it2 it3 sjm ot3 ...
9626 */
9627 
next_depth_first_tab(JOIN * join,JOIN_TAB * tab)9628 JOIN_TAB *next_depth_first_tab(JOIN* join, JOIN_TAB* tab)
9629 {
9630   /* If we're inside SJM nest and have reached its end, get out */
9631   if (tab->last_leaf_in_bush)
9632     return tab->bush_root_tab;
9633 
9634   /* Move to next tab in the array we're traversing */
9635   tab++;
9636 
9637   if (tab == join->join_tab +join->top_join_tab_count)
9638     return NULL; /* Outside SJM nest and reached EOF */
9639 
9640   if (tab->bush_children)
9641     return tab->bush_children->start;
9642 
9643   return tab;
9644 }
9645 
9646 
check_two_phase_optimization(THD * thd)9647 bool JOIN::check_two_phase_optimization(THD *thd)
9648 {
9649   if (check_for_splittable_materialized())
9650     return true;
9651   return false;
9652 }
9653 
9654 
inject_cond_into_where(Item * injected_cond)9655 bool JOIN::inject_cond_into_where(Item *injected_cond)
9656 {
9657   Item *where_item= injected_cond;
9658   List<Item> *and_args= NULL;
9659   if (conds && conds->type() == Item::COND_ITEM &&
9660       ((Item_cond*) conds)->functype() == Item_func::COND_AND_FUNC)
9661   {
9662     and_args= ((Item_cond*) conds)->argument_list();
9663     if (cond_equal)
9664       and_args->disjoin((List<Item> *) &cond_equal->current_level);
9665   }
9666 
9667   where_item= and_items(thd, conds, where_item);
9668   if (where_item->fix_fields_if_needed(thd, 0))
9669     return true;
9670   thd->change_item_tree(&select_lex->where, where_item);
9671   select_lex->where->top_level_item();
9672   conds= select_lex->where;
9673 
9674   if (and_args && cond_equal)
9675   {
9676     and_args= ((Item_cond*) conds)->argument_list();
9677     List_iterator<Item_equal> li(cond_equal->current_level);
9678     Item_equal *elem;
9679     while ((elem= li++))
9680     {
9681       and_args->push_back(elem, thd->mem_root);
9682     }
9683   }
9684 
9685   return false;
9686 
9687 }
9688 
9689 
9690 static Item * const null_ptr= NULL;
9691 
9692 /*
9693   Set up join struct according to the picked join order in
9694 
9695   SYNOPSIS
9696     get_best_combination()
9697       join  The join to process (the picked join order is mainly in
9698             join->best_positions)
9699 
9700   DESCRIPTION
9701     Setup join structures according the picked join order
9702     - finalize semi-join strategy choices (see
9703         fix_semijoin_strategies_for_picked_join_order)
9704     - create join->join_tab array and put there the JOIN_TABs in the join order
9705     - create data structures describing ref access methods.
9706 
9707   NOTE
9708     In this function we switch from pre-join-optimization JOIN_TABs to
9709     post-join-optimization JOIN_TABs. This is achieved by copying the entire
9710     JOIN_TAB objects.
9711 
9712   RETURN
9713     FALSE  OK
9714     TRUE   Out of memory
9715 */
9716 
get_best_combination()9717 bool JOIN::get_best_combination()
9718 {
9719   uint tablenr;
9720   table_map used_tables;
9721   JOIN_TAB *j;
9722   KEYUSE *keyuse;
9723   DBUG_ENTER("get_best_combination");
9724 
9725    /*
9726     Additional plan nodes for postjoin tmp tables:
9727       1? + // For GROUP BY
9728       1? + // For DISTINCT
9729       1? + // For aggregation functions aggregated in outer query
9730            // when used with distinct
9731       1? + // For ORDER BY
9732       1?   // buffer result
9733     Up to 2 tmp tables are actually used, but it's hard to tell exact number
9734     at this stage.
9735   */
9736   uint aggr_tables= (group_list ? 1 : 0) +
9737                     (select_distinct ?
9738                      (tmp_table_param.using_outer_summary_function ? 2 : 1) : 0) +
9739                     (order ? 1 : 0) +
9740        (select_options & (SELECT_BIG_RESULT | OPTION_BUFFER_RESULT) ? 1 : 0) ;
9741 
9742   if (aggr_tables == 0)
9743     aggr_tables= 1; /* For group by pushdown */
9744 
9745   if (select_lex->window_specs.elements)
9746     aggr_tables++;
9747 
9748   if (aggr_tables > 2)
9749     aggr_tables= 2;
9750   if (!(join_tab= (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB)*
9751                                         (top_join_tab_count + aggr_tables))))
9752     DBUG_RETURN(TRUE);
9753 
9754   full_join=0;
9755   hash_join= FALSE;
9756 
9757   fix_semijoin_strategies_for_picked_join_order(this);
9758 
9759   if (inject_splitting_cond_for_all_tables_with_split_opt())
9760     DBUG_RETURN(TRUE);
9761 
9762   JOIN_TAB_RANGE *root_range;
9763   if (!(root_range= new (thd->mem_root) JOIN_TAB_RANGE))
9764     DBUG_RETURN(TRUE);
9765    root_range->start= join_tab;
9766   /* root_range->end will be set later */
9767   join_tab_ranges.empty();
9768 
9769   if (join_tab_ranges.push_back(root_range, thd->mem_root))
9770     DBUG_RETURN(TRUE);
9771 
9772   JOIN_TAB *sjm_nest_end= NULL;
9773   JOIN_TAB *sjm_nest_root= NULL;
9774 
9775   for (j=join_tab, tablenr=0 ; tablenr < table_count ; tablenr++,j++)
9776   {
9777     TABLE *form;
9778     POSITION *cur_pos= &best_positions[tablenr];
9779     if (cur_pos->sj_strategy == SJ_OPT_MATERIALIZE ||
9780         cur_pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN)
9781     {
9782       /*
9783         Ok, we've entered an SJ-Materialization semi-join (note that this can't
9784         be done recursively, semi-joins are not allowed to be nested).
9785         1. Put into main join order a JOIN_TAB that represents a lookup or scan
9786            in the temptable.
9787       */
9788       bzero((void*)j, sizeof(JOIN_TAB));
9789       j->join= this;
9790       j->table= NULL; //temporary way to tell SJM tables from others.
9791       j->ref.key = -1;
9792       j->on_expr_ref= (Item**) &null_ptr;
9793       j->keys= key_map(1); /* The unique index is always in 'possible keys' in EXPLAIN */
9794 
9795       /*
9796         2. Proceed with processing SJM nest's join tabs, putting them into the
9797            sub-order
9798       */
9799       SJ_MATERIALIZATION_INFO *sjm= cur_pos->table->emb_sj_nest->sj_mat_info;
9800       j->records_read= (sjm->is_sj_scan? sjm->rows : 1);
9801       j->records= (ha_rows) j->records_read;
9802       j->cond_selectivity= 1.0;
9803       JOIN_TAB *jt;
9804       JOIN_TAB_RANGE *jt_range;
9805       if (!(jt= (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB)*sjm->tables)) ||
9806           !(jt_range= new JOIN_TAB_RANGE))
9807         DBUG_RETURN(TRUE);
9808       jt_range->start= jt;
9809       jt_range->end= jt + sjm->tables;
9810       join_tab_ranges.push_back(jt_range, thd->mem_root);
9811       j->bush_children= jt_range;
9812       sjm_nest_end= jt + sjm->tables;
9813       sjm_nest_root= j;
9814 
9815       j= jt;
9816     }
9817 
9818     *j= *best_positions[tablenr].table;
9819 
9820     j->bush_root_tab= sjm_nest_root;
9821 
9822     form= table[tablenr]= j->table;
9823     form->reginfo.join_tab=j;
9824     DBUG_PRINT("info",("type: %d", j->type));
9825     if (j->type == JT_CONST)
9826       goto loop_end;					// Handled in make_join_stat..
9827 
9828     j->loosescan_match_tab= NULL;  //non-nulls will be set later
9829     j->inside_loosescan_range= FALSE;
9830     j->ref.key = -1;
9831     j->ref.key_parts=0;
9832 
9833     if (j->type == JT_SYSTEM)
9834       goto loop_end;
9835     if ( !(keyuse= best_positions[tablenr].key))
9836     {
9837       j->type=JT_ALL;
9838       if (best_positions[tablenr].use_join_buffer &&
9839           tablenr != const_tables)
9840 	full_join= 1;
9841     }
9842 
9843     /*if (best_positions[tablenr].sj_strategy == SJ_OPT_LOOSE_SCAN)
9844     {
9845       DBUG_ASSERT(!keyuse || keyuse->key ==
9846                              best_positions[tablenr].loosescan_picker.loosescan_key);
9847       j->index= best_positions[tablenr].loosescan_picker.loosescan_key;
9848     }*/
9849 
9850     if ((j->type == JT_REF || j->type == JT_EQ_REF) &&
9851         is_hash_join_key_no(j->ref.key))
9852       hash_join= TRUE;
9853 
9854   loop_end:
9855     /*
9856       Save records_read in JOIN_TAB so that select_describe()/etc don't have
9857       to access join->best_positions[].
9858     */
9859     j->records_read= best_positions[tablenr].records_read;
9860     j->cond_selectivity= best_positions[tablenr].cond_selectivity;
9861     map2table[j->table->tablenr]= j;
9862 
9863     /* If we've reached the end of sjm nest, switch back to main sequence */
9864     if (j + 1 == sjm_nest_end)
9865     {
9866       j->last_leaf_in_bush= TRUE;
9867       j= sjm_nest_root;
9868       sjm_nest_root= NULL;
9869       sjm_nest_end= NULL;
9870     }
9871   }
9872   root_range->end= j;
9873 
9874   used_tables= OUTER_REF_TABLE_BIT;		// Outer row is already read
9875   for (j=join_tab, tablenr=0 ; tablenr < table_count ; tablenr++,j++)
9876   {
9877     if (j->bush_children)
9878       j= j->bush_children->start;
9879 
9880     used_tables|= j->table->map;
9881     if (j->type != JT_CONST && j->type != JT_SYSTEM)
9882     {
9883       if ((keyuse= best_positions[tablenr].key) &&
9884           create_ref_for_key(this, j, keyuse, TRUE, used_tables))
9885         DBUG_RETURN(TRUE);              // Something went wrong
9886     }
9887     if (j->last_leaf_in_bush)
9888       j= j->bush_root_tab;
9889   }
9890 
9891   top_join_tab_count= (uint)(join_tab_ranges.head()->end -
9892                       join_tab_ranges.head()->start);
9893 
9894   update_depend_map(this);
9895   DBUG_RETURN(0);
9896 }
9897 
9898 /**
9899   Create a descriptor of hash join key to access a given join table
9900 
9901   @param   join         join which the join table belongs to
9902   @param   join_tab     the join table to access
9903   @param   org_keyuse   beginning of the key uses to join this table
9904   @param   used_tables  bitmap of the previous tables
9905 
9906   @details
9907   This function first finds key uses that can be utilized by the hash join
9908   algorithm to join join_tab to the previous tables marked in the bitmap
9909   used_tables.  The tested key uses are taken from the array of all key uses
9910   for 'join' starting from the position org_keyuse. After all interesting key
9911   uses have been found the function builds a descriptor of the corresponding
9912   key that is used by the hash join algorithm would it be chosen to join
9913   the table join_tab.
9914 
9915   @retval  FALSE  the descriptor for a hash join key is successfully created
9916   @retval  TRUE   otherwise
9917 */
9918 
create_hj_key_for_table(JOIN * join,JOIN_TAB * join_tab,KEYUSE * org_keyuse,table_map used_tables)9919 static bool create_hj_key_for_table(JOIN *join, JOIN_TAB *join_tab,
9920                                     KEYUSE *org_keyuse, table_map used_tables)
9921 {
9922   KEY *keyinfo;
9923   KEY_PART_INFO *key_part_info;
9924   KEYUSE *keyuse= org_keyuse;
9925   uint key_parts= 0;
9926   THD  *thd= join->thd;
9927   TABLE *table= join_tab->table;
9928   bool first_keyuse= TRUE;
9929   DBUG_ENTER("create_hj_key_for_table");
9930 
9931   do
9932   {
9933     if (!(~used_tables & keyuse->used_tables) &&
9934         join_tab->keyuse_is_valid_for_access_in_chosen_plan(join, keyuse) &&
9935         are_tables_local(join_tab, keyuse->used_tables))
9936     {
9937       if (first_keyuse)
9938       {
9939         key_parts++;
9940       }
9941       else
9942       {
9943         KEYUSE *curr= org_keyuse;
9944         for( ; curr < keyuse; curr++)
9945         {
9946           if (curr->keypart == keyuse->keypart &&
9947               !(~used_tables & curr->used_tables) &&
9948               join_tab->keyuse_is_valid_for_access_in_chosen_plan(join,
9949                                                                   curr) &&
9950               are_tables_local(join_tab, curr->used_tables))
9951             break;
9952         }
9953         if (curr == keyuse)
9954            key_parts++;
9955       }
9956     }
9957     first_keyuse= FALSE;
9958     keyuse++;
9959   } while (keyuse->table == table && keyuse->is_for_hash_join());
9960   if (!key_parts)
9961     DBUG_RETURN(TRUE);
9962   /* This memory is allocated only once for the joined table join_tab */
9963   if (!(keyinfo= (KEY *) thd->alloc(sizeof(KEY))) ||
9964       !(key_part_info = (KEY_PART_INFO *) thd->alloc(sizeof(KEY_PART_INFO)*
9965                                                      key_parts)))
9966     DBUG_RETURN(TRUE);
9967   keyinfo->usable_key_parts= keyinfo->user_defined_key_parts = key_parts;
9968   keyinfo->ext_key_parts= keyinfo->user_defined_key_parts;
9969   keyinfo->key_part= key_part_info;
9970   keyinfo->key_length=0;
9971   keyinfo->algorithm= HA_KEY_ALG_UNDEF;
9972   keyinfo->flags= HA_GENERATED_KEY;
9973   keyinfo->is_statistics_from_stat_tables= FALSE;
9974   keyinfo->name.str= "$hj";
9975   keyinfo->name.length= 3;
9976   keyinfo->rec_per_key= (ulong*) thd->calloc(sizeof(ulong)*key_parts);
9977   if (!keyinfo->rec_per_key)
9978     DBUG_RETURN(TRUE);
9979   keyinfo->key_part= key_part_info;
9980 
9981   first_keyuse= TRUE;
9982   keyuse= org_keyuse;
9983   do
9984   {
9985     if (!(~used_tables & keyuse->used_tables) &&
9986         join_tab->keyuse_is_valid_for_access_in_chosen_plan(join, keyuse) &&
9987         are_tables_local(join_tab, keyuse->used_tables))
9988     {
9989       bool add_key_part= TRUE;
9990       if (!first_keyuse)
9991       {
9992         for(KEYUSE *curr= org_keyuse; curr < keyuse; curr++)
9993         {
9994           if (curr->keypart == keyuse->keypart &&
9995               !(~used_tables & curr->used_tables) &&
9996               join_tab->keyuse_is_valid_for_access_in_chosen_plan(join,
9997                                                                   curr) &&
9998               are_tables_local(join_tab, curr->used_tables))
9999 	  {
10000             keyuse->keypart= NO_KEYPART;
10001             add_key_part= FALSE;
10002             break;
10003           }
10004         }
10005       }
10006       if (add_key_part)
10007       {
10008         Field *field= table->field[keyuse->keypart];
10009         uint fieldnr= keyuse->keypart+1;
10010         table->create_key_part_by_field(key_part_info, field, fieldnr);
10011         keyinfo->key_length += key_part_info->store_length;
10012         key_part_info++;
10013       }
10014     }
10015     first_keyuse= FALSE;
10016     keyuse++;
10017   } while (keyuse->table == table && keyuse->is_for_hash_join());
10018 
10019   keyinfo->ext_key_parts= keyinfo->user_defined_key_parts;
10020   keyinfo->ext_key_flags= keyinfo->flags;
10021   keyinfo->ext_key_part_map= 0;
10022 
10023   join_tab->hj_key= keyinfo;
10024 
10025   DBUG_RETURN(FALSE);
10026 }
10027 
10028 /*
10029   Check if a set of tables specified by used_tables can be accessed when
10030   we're doing scan on join_tab jtab.
10031 */
are_tables_local(JOIN_TAB * jtab,table_map used_tables)10032 static bool are_tables_local(JOIN_TAB *jtab, table_map used_tables)
10033 {
10034   if (jtab->bush_root_tab)
10035   {
10036     /*
10037       jtab is inside execution join nest. We may not refer to outside tables,
10038       except the const tables.
10039     */
10040     table_map local_tables= jtab->emb_sj_nest->nested_join->used_tables |
10041                             jtab->join->const_table_map |
10042                             OUTER_REF_TABLE_BIT;
10043     return !MY_TEST(used_tables & ~local_tables);
10044   }
10045 
10046   /*
10047     If we got here then jtab is at top level.
10048      - all other tables at top level are accessible,
10049      - tables in join nests are accessible too, because all their columns that
10050        are needed at top level will be unpacked when scanning the
10051        materialization table.
10052   */
10053   return TRUE;
10054 }
10055 
create_ref_for_key(JOIN * join,JOIN_TAB * j,KEYUSE * org_keyuse,bool allow_full_scan,table_map used_tables)10056 static bool create_ref_for_key(JOIN *join, JOIN_TAB *j,
10057                                KEYUSE *org_keyuse, bool allow_full_scan,
10058                                table_map used_tables)
10059 {
10060   uint keyparts, length, key;
10061   TABLE *table;
10062   KEY *keyinfo;
10063   KEYUSE *keyuse= org_keyuse;
10064   bool ftkey= (keyuse->keypart == FT_KEYPART);
10065   THD *thd= join->thd;
10066   DBUG_ENTER("create_ref_for_key");
10067 
10068   /*  Use best key from find_best */
10069   table= j->table;
10070   key= keyuse->key;
10071   if (!is_hash_join_key_no(key))
10072     keyinfo= table->key_info+key;
10073   else
10074   {
10075     if (create_hj_key_for_table(join, j, org_keyuse, used_tables))
10076       DBUG_RETURN(TRUE);
10077     keyinfo= j->hj_key;
10078   }
10079 
10080   if (ftkey)
10081   {
10082     Item_func_match *ifm=(Item_func_match *)keyuse->val;
10083 
10084     length=0;
10085     keyparts=1;
10086     ifm->join_key=1;
10087   }
10088   else
10089   {
10090     keyparts=length=0;
10091     uint found_part_ref_or_null= 0;
10092     /*
10093       Calculate length for the used key
10094       Stop if there is a missing key part or when we find second key_part
10095       with KEY_OPTIMIZE_REF_OR_NULL
10096     */
10097     do
10098     {
10099       if (!(~used_tables & keyuse->used_tables) &&
10100           (!keyuse->validity_ref || *keyuse->validity_ref) &&
10101 	  j->keyuse_is_valid_for_access_in_chosen_plan(join, keyuse))
10102       {
10103         if  (are_tables_local(j, keyuse->val->used_tables()))
10104         {
10105           if ((is_hash_join_key_no(key) && keyuse->keypart != NO_KEYPART) ||
10106               (!is_hash_join_key_no(key) && keyparts == keyuse->keypart &&
10107                !(found_part_ref_or_null & keyuse->optimize)))
10108           {
10109              length+= keyinfo->key_part[keyparts].store_length;
10110              keyparts++;
10111              found_part_ref_or_null|= keyuse->optimize & ~KEY_OPTIMIZE_EQ;
10112           }
10113         }
10114       }
10115       keyuse++;
10116     } while (keyuse->table == table && keyuse->key == key);
10117 
10118     if (!keyparts && allow_full_scan)
10119     {
10120       /* It's a LooseIndexScan strategy scanning whole index */
10121       j->type= JT_ALL;
10122       j->index= key;
10123       DBUG_RETURN(FALSE);
10124     }
10125 
10126     DBUG_ASSERT(length > 0);
10127     DBUG_ASSERT(keyparts != 0);
10128   } /* not ftkey */
10129 
10130   /* set up fieldref */
10131   j->ref.key_parts= keyparts;
10132   j->ref.key_length= length;
10133   j->ref.key= (int) key;
10134   if (!(j->ref.key_buff= (uchar*) thd->calloc(ALIGN_SIZE(length)*2)) ||
10135       !(j->ref.key_copy= (store_key**) thd->alloc((sizeof(store_key*) *
10136 						          (keyparts+1)))) ||
10137       !(j->ref.items=(Item**) thd->alloc(sizeof(Item*)*keyparts)) ||
10138       !(j->ref.cond_guards= (bool**) thd->alloc(sizeof(uint*)*keyparts)))
10139   {
10140     DBUG_RETURN(TRUE);
10141   }
10142   j->ref.key_buff2=j->ref.key_buff+ALIGN_SIZE(length);
10143   j->ref.key_err=1;
10144   j->ref.has_record= FALSE;
10145   j->ref.null_rejecting= 0;
10146   j->ref.disable_cache= FALSE;
10147   j->ref.null_ref_part= NO_REF_PART;
10148   j->ref.const_ref_part_map= 0;
10149   j->ref.uses_splitting= FALSE;
10150   keyuse=org_keyuse;
10151 
10152   store_key **ref_key= j->ref.key_copy;
10153   uchar *key_buff=j->ref.key_buff, *null_ref_key= 0;
10154   uint null_ref_part= NO_REF_PART;
10155   bool keyuse_uses_no_tables= TRUE;
10156   uint not_null_keyparts= 0;
10157   if (ftkey)
10158   {
10159     j->ref.items[0]=((Item_func*)(keyuse->val))->key_item();
10160     /* Predicates pushed down into subquery can't be used FT access */
10161     j->ref.cond_guards[0]= NULL;
10162     if (keyuse->used_tables)
10163       DBUG_RETURN(TRUE);                        // not supported yet. SerG
10164 
10165     j->type=JT_FT;
10166   }
10167   else
10168   {
10169     uint i;
10170     for (i=0 ; i < keyparts ; keyuse++,i++)
10171     {
10172       while (((~used_tables) & keyuse->used_tables) ||
10173              (keyuse->validity_ref && !(*keyuse->validity_ref)) ||
10174 	     !j->keyuse_is_valid_for_access_in_chosen_plan(join, keyuse) ||
10175              keyuse->keypart == NO_KEYPART ||
10176 	     (keyuse->keypart !=
10177               (is_hash_join_key_no(key) ?
10178                  keyinfo->key_part[i].field->field_index : i)) ||
10179              !are_tables_local(j, keyuse->val->used_tables()))
10180 	 keyuse++;                              	/* Skip other parts */
10181 
10182       uint maybe_null= MY_TEST(keyinfo->key_part[i].null_bit);
10183       j->ref.items[i]=keyuse->val;		// Save for cond removal
10184       j->ref.cond_guards[i]= keyuse->cond_guard;
10185 
10186       if (!keyuse->val->maybe_null || keyuse->null_rejecting)
10187         not_null_keyparts++;
10188       /*
10189         Set ref.null_rejecting to true only if we are going to inject a
10190         "keyuse->val IS NOT NULL" predicate.
10191       */
10192       Item *real= (keyuse->val)->real_item();
10193       if (keyuse->null_rejecting && (real->type() == Item::FIELD_ITEM) &&
10194           ((Item_field*)real)->field->maybe_null())
10195         j->ref.null_rejecting|= (key_part_map)1 << i;
10196 
10197       keyuse_uses_no_tables= keyuse_uses_no_tables && !keyuse->used_tables;
10198       j->ref.uses_splitting |= (keyuse->validity_ref != NULL);
10199       /*
10200         We don't want to compute heavy expressions in EXPLAIN, an example would
10201         select * from t1 where t1.key=(select thats very heavy);
10202 
10203         (select thats very heavy) => is a constant here
10204         eg: (select avg(order_cost) from orders) => constant but expensive
10205       */
10206       if (!keyuse->val->used_tables() && !thd->lex->describe)
10207       {					// Compare against constant
10208         store_key_item tmp(thd,
10209                            keyinfo->key_part[i].field,
10210                            key_buff + maybe_null,
10211                            maybe_null ?  key_buff : 0,
10212                            keyinfo->key_part[i].length,
10213                            keyuse->val,
10214                            FALSE);
10215         if (unlikely(thd->is_fatal_error))
10216           DBUG_RETURN(TRUE);
10217         tmp.copy();
10218         j->ref.const_ref_part_map |= key_part_map(1) << i ;
10219       }
10220       else
10221       {
10222         *ref_key++= get_store_key(thd,
10223                                   keyuse,join->const_table_map,
10224                                   &keyinfo->key_part[i],
10225                                   key_buff, maybe_null);
10226         if (!keyuse->val->used_tables())
10227           j->ref.const_ref_part_map |= key_part_map(1) << i ;
10228       }
10229       /*
10230 	Remember if we are going to use REF_OR_NULL
10231 	But only if field _really_ can be null i.e. we force JT_REF
10232 	instead of JT_REF_OR_NULL in case if field can't be null
10233       */
10234       if ((keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL) && maybe_null)
10235       {
10236 	null_ref_key= key_buff;
10237         null_ref_part= i;
10238       }
10239       key_buff+= keyinfo->key_part[i].store_length;
10240     }
10241   } /* not ftkey */
10242   *ref_key=0;				// end_marker
10243   if (j->type == JT_FT)
10244     DBUG_RETURN(0);
10245   ulong key_flags= j->table->actual_key_flags(keyinfo);
10246   if (j->type == JT_CONST)
10247     j->table->const_table= 1;
10248   else if (!((keyparts == keyinfo->user_defined_key_parts &&
10249               (
10250                 (key_flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME ||
10251                 /* Unique key and all keyparts are NULL rejecting */
10252                 ((key_flags & HA_NOSAME) && keyparts == not_null_keyparts)
10253               )) ||
10254               /* true only for extended keys */
10255               (keyparts > keyinfo->user_defined_key_parts &&
10256                MY_TEST(key_flags & HA_EXT_NOSAME) &&
10257                keyparts == keyinfo->ext_key_parts)
10258             ) ||
10259             null_ref_key)
10260   {
10261     /* Must read with repeat */
10262     j->type= null_ref_key ? JT_REF_OR_NULL : JT_REF;
10263     j->ref.null_ref_key= null_ref_key;
10264     j->ref.null_ref_part= null_ref_part;
10265   }
10266   else if (keyuse_uses_no_tables)
10267   {
10268     /*
10269       This happen if we are using a constant expression in the ON part
10270       of an LEFT JOIN.
10271       SELECT * FROM a LEFT JOIN b ON b.key=30
10272       Here we should not mark the table as a 'const' as a field may
10273       have a 'normal' value or a NULL value.
10274     */
10275     j->type=JT_CONST;
10276   }
10277   else
10278     j->type=JT_EQ_REF;
10279 
10280   j->read_record.unlock_row= (j->type == JT_EQ_REF)?
10281                              join_read_key_unlock_row : rr_unlock_row;
10282   DBUG_RETURN(0);
10283 }
10284 
10285 
10286 
10287 static store_key *
get_store_key(THD * thd,KEYUSE * keyuse,table_map used_tables,KEY_PART_INFO * key_part,uchar * key_buff,uint maybe_null)10288 get_store_key(THD *thd, KEYUSE *keyuse, table_map used_tables,
10289 	      KEY_PART_INFO *key_part, uchar *key_buff, uint maybe_null)
10290 {
10291   if (!((~used_tables) & keyuse->used_tables))		// if const item
10292   {
10293     return new store_key_const_item(thd,
10294 				    key_part->field,
10295 				    key_buff + maybe_null,
10296 				    maybe_null ? key_buff : 0,
10297 				    key_part->length,
10298 				    keyuse->val);
10299   }
10300   else if (keyuse->val->type() == Item::FIELD_ITEM ||
10301            (keyuse->val->type() == Item::REF_ITEM &&
10302 	    ((((Item_ref*)keyuse->val)->ref_type() == Item_ref::OUTER_REF &&
10303               (*(Item_ref**)((Item_ref*)keyuse->val)->ref)->ref_type() ==
10304               Item_ref::DIRECT_REF) ||
10305              ((Item_ref*)keyuse->val)->ref_type() == Item_ref::VIEW_REF) &&
10306             keyuse->val->real_item()->type() == Item::FIELD_ITEM))
10307     return new store_key_field(thd,
10308 			       key_part->field,
10309 			       key_buff + maybe_null,
10310 			       maybe_null ? key_buff : 0,
10311 			       key_part->length,
10312 			       ((Item_field*) keyuse->val->real_item())->field,
10313 			       keyuse->val->real_item()->full_name());
10314 
10315   return new store_key_item(thd,
10316 			    key_part->field,
10317 			    key_buff + maybe_null,
10318 			    maybe_null ? key_buff : 0,
10319 			    key_part->length,
10320 			    keyuse->val, FALSE);
10321 }
10322 
10323 
add_cond_and_fix(THD * thd,Item ** e1,Item * e2)10324 inline void add_cond_and_fix(THD *thd, Item **e1, Item *e2)
10325 {
10326   if (*e1)
10327   {
10328     if (!e2)
10329       return;
10330     Item *res;
10331     if ((res= new (thd->mem_root) Item_cond_and(thd, *e1, e2)))
10332     {
10333       res->fix_fields(thd, 0);
10334       res->update_used_tables();
10335       *e1= res;
10336     }
10337   }
10338   else
10339     *e1= e2;
10340 }
10341 
10342 
10343 /**
10344   Add to join_tab->select_cond[i] "table.field IS NOT NULL" conditions
10345   we've inferred from ref/eq_ref access performed.
10346 
10347     This function is a part of "Early NULL-values filtering for ref access"
10348     optimization.
10349 
10350     Example of this optimization:
10351     For query SELECT * FROM t1,t2 WHERE t2.key=t1.field @n
10352     and plan " any-access(t1), ref(t2.key=t1.field) " @n
10353     add "t1.field IS NOT NULL" to t1's table condition. @n
10354 
10355     Description of the optimization:
10356 
10357       We look through equalities choosen to perform ref/eq_ref access,
10358       pick equalities that have form "tbl.part_of_key = othertbl.field"
10359       (where othertbl is a non-const table and othertbl.field may be NULL)
10360       and add them to conditions on correspoding tables (othertbl in this
10361       example).
10362 
10363       Exception from that is the case when referred_tab->join != join.
10364       I.e. don't add NOT NULL constraints from any embedded subquery.
10365       Consider this query:
10366       @code
10367       SELECT A.f2 FROM t1 LEFT JOIN t2 A ON A.f2 = f1
10368       WHERE A.f3=(SELECT MIN(f3) FROM  t2 C WHERE A.f4 = C.f4) OR A.f3 IS NULL;
10369       @endocde
10370       Here condition A.f3 IS NOT NULL is going to be added to the WHERE
10371       condition of the embedding query.
10372       Another example:
10373       SELECT * FROM t10, t11 WHERE (t10.a < 10 OR t10.a IS NULL)
10374       AND t11.b <=> t10.b AND (t11.a = (SELECT MAX(a) FROM t12
10375       WHERE t12.b = t10.a ));
10376       Here condition t10.a IS NOT NULL is going to be added.
10377       In both cases addition of NOT NULL condition will erroneously reject
10378       some rows of the result set.
10379       referred_tab->join != join constraint would disallow such additions.
10380 
10381       This optimization doesn't affect the choices that ref, range, or join
10382       optimizer make. This was intentional because this was added after 4.1
10383       was GA.
10384 
10385     Implementation overview
10386       1. update_ref_and_keys() accumulates info about null-rejecting
10387          predicates in in KEY_FIELD::null_rejecting
10388       1.1 add_key_part saves these to KEYUSE.
10389       2. create_ref_for_key copies them to TABLE_REF.
10390       3. add_not_null_conds adds "x IS NOT NULL" to join_tab->select_cond of
10391          appropiate JOIN_TAB members.
10392 */
10393 
add_not_null_conds(JOIN * join)10394 static void add_not_null_conds(JOIN *join)
10395 {
10396   JOIN_TAB *tab;
10397   DBUG_ENTER("add_not_null_conds");
10398 
10399   for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
10400        tab;
10401        tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
10402   {
10403     if (tab->type == JT_REF || tab->type == JT_EQ_REF ||
10404         tab->type == JT_REF_OR_NULL)
10405     {
10406       for (uint keypart= 0; keypart < tab->ref.key_parts; keypart++)
10407       {
10408         if (tab->ref.null_rejecting & ((key_part_map)1 << keypart))
10409         {
10410           Item *item= tab->ref.items[keypart];
10411           Item *notnull;
10412           Item *real= item->real_item();
10413 	  if (real->const_item() && real->type() != Item::FIELD_ITEM &&
10414               !real->is_expensive())
10415           {
10416             /*
10417               It could be constant instead of field after constant
10418               propagation.
10419             */
10420             continue;
10421           }
10422           DBUG_ASSERT(real->type() == Item::FIELD_ITEM);
10423           Item_field *not_null_item= (Item_field*)real;
10424           JOIN_TAB *referred_tab= not_null_item->field->table->reginfo.join_tab;
10425           /*
10426             For UPDATE queries such as:
10427             UPDATE t1 SET t1.f2=(SELECT MAX(t2.f4) FROM t2 WHERE t2.f3=t1.f1);
10428             not_null_item is the t1.f1, but it's referred_tab is 0.
10429           */
10430           if (!(notnull= new (join->thd->mem_root)
10431                 Item_func_isnotnull(join->thd, item)))
10432             DBUG_VOID_RETURN;
10433           /*
10434             We need to do full fix_fields() call here in order to have correct
10435             notnull->const_item(). This is needed e.g. by test_quick_select
10436             when it is called from make_join_select after this function is
10437             called.
10438           */
10439           if (notnull->fix_fields(join->thd, &notnull))
10440             DBUG_VOID_RETURN;
10441 
10442           DBUG_EXECUTE("where",print_where(notnull,
10443                                             (referred_tab ?
10444                                             referred_tab->table->alias.c_ptr() :
10445                                             "outer_ref_cond"),
10446                                             QT_ORDINARY););
10447           if (!tab->first_inner)
10448           {
10449             COND *new_cond= (referred_tab && referred_tab->join == join) ?
10450                               referred_tab->select_cond :
10451                               join->outer_ref_cond;
10452             add_cond_and_fix(join->thd, &new_cond, notnull);
10453             if (referred_tab && referred_tab->join == join)
10454               referred_tab->set_select_cond(new_cond, __LINE__);
10455             else
10456               join->outer_ref_cond= new_cond;
10457           }
10458           else
10459             add_cond_and_fix(join->thd, tab->first_inner->on_expr_ref, notnull);
10460         }
10461       }
10462     }
10463   }
10464   DBUG_VOID_RETURN;
10465 }
10466 
10467 /**
10468   Build a predicate guarded by match variables for embedding outer joins.
10469   The function recursively adds guards for predicate cond
10470   assending from tab to the first inner table  next embedding
10471   nested outer join and so on until it reaches root_tab
10472   (root_tab can be 0).
10473 
10474   In other words:
10475   add_found_match_trig_cond(tab->first_inner_tab, y, 0) is the way one should
10476   wrap parts of WHERE.  The idea is that the part of WHERE should be only
10477   evaluated after we've finished figuring out whether outer joins.
10478   ^^^ is the above correct?
10479 
10480   @param tab       the first inner table for most nested outer join
10481   @param cond      the predicate to be guarded (must be set)
10482   @param root_tab  the first inner table to stop
10483 
10484   @return
10485     -  pointer to the guarded predicate, if success
10486     -  0, otherwise
10487 */
10488 
10489 static COND*
add_found_match_trig_cond(THD * thd,JOIN_TAB * tab,COND * cond,JOIN_TAB * root_tab)10490 add_found_match_trig_cond(THD *thd, JOIN_TAB *tab, COND *cond,
10491                           JOIN_TAB *root_tab)
10492 {
10493   COND *tmp;
10494   DBUG_ASSERT(cond != 0);
10495   if (tab == root_tab)
10496     return cond;
10497   if ((tmp= add_found_match_trig_cond(thd, tab->first_upper, cond, root_tab)))
10498     tmp= new (thd->mem_root) Item_func_trig_cond(thd, tmp, &tab->found);
10499   if (tmp)
10500   {
10501     tmp->quick_fix_field();
10502     tmp->update_used_tables();
10503   }
10504   return tmp;
10505 }
10506 
10507 
is_active_sjm()10508 bool TABLE_LIST::is_active_sjm()
10509 {
10510   return sj_mat_info && sj_mat_info->is_used;
10511 }
10512 
10513 
10514 /**
10515   Fill in outer join related info for the execution plan structure.
10516 
10517     For each outer join operation left after simplification of the
10518     original query the function set up the following pointers in the linear
10519     structure join->join_tab representing the selected execution plan.
10520     The first inner table t0 for the operation is set to refer to the last
10521     inner table tk through the field t0->last_inner.
10522     Any inner table ti for the operation are set to refer to the first
10523     inner table ti->first_inner.
10524     The first inner table t0 for the operation is set to refer to the
10525     first inner table of the embedding outer join operation, if there is any,
10526     through the field t0->first_upper.
10527     The on expression for the outer join operation is attached to the
10528     corresponding first inner table through the field t0->on_expr_ref.
10529     Here ti are structures of the JOIN_TAB type.
10530 
10531     In other words, for each join tab, set
10532      - first_inner
10533      - last_inner
10534      - first_upper
10535      - on_expr_ref, cond_equal
10536 
10537   EXAMPLE. For the query:
10538   @code
10539         SELECT * FROM t1
10540                       LEFT JOIN
10541                       (t2, t3 LEFT JOIN t4 ON t3.a=t4.a)
10542                       ON (t1.a=t2.a AND t1.b=t3.b)
10543           WHERE t1.c > 5,
10544   @endcode
10545 
10546     given the execution plan with the table order t1,t2,t3,t4
10547     is selected, the following references will be set;
10548     t4->last_inner=[t4], t4->first_inner=[t4], t4->first_upper=[t2]
10549     t2->last_inner=[t4], t2->first_inner=t3->first_inner=[t2],
10550     on expression (t1.a=t2.a AND t1.b=t3.b) will be attached to
10551     *t2->on_expr_ref, while t3.a=t4.a will be attached to *t4->on_expr_ref.
10552 
10553   @param join   reference to the info fully describing the query
10554 
10555   @note
10556     The function assumes that the simplification procedure has been
10557     already applied to the join query (see simplify_joins).
10558     This function can be called only after the execution plan
10559     has been chosen.
10560 */
10561 
10562 static bool
make_outerjoin_info(JOIN * join)10563 make_outerjoin_info(JOIN *join)
10564 {
10565   DBUG_ENTER("make_outerjoin_info");
10566 
10567   /*
10568     Create temp. tables for merged SJ-Materialization nests. We need to do
10569     this now, because further code relies on tab->table and
10570     tab->table->pos_in_table_list being set.
10571   */
10572   JOIN_TAB *tab;
10573   for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
10574        tab;
10575        tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
10576   {
10577     if (tab->bush_children)
10578     {
10579       if (setup_sj_materialization_part1(tab))
10580         DBUG_RETURN(TRUE);
10581       tab->table->reginfo.join_tab= tab;
10582     }
10583   }
10584 
10585   for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
10586        tab;
10587        tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
10588   {
10589     TABLE *table= tab->table;
10590     TABLE_LIST *tbl= table->pos_in_table_list;
10591     TABLE_LIST *embedding= tbl->embedding;
10592 
10593     if (tbl->outer_join & (JOIN_TYPE_LEFT | JOIN_TYPE_RIGHT))
10594     {
10595       /*
10596         Table tab is the only one inner table for outer join.
10597         (Like table t4 for the table reference t3 LEFT JOIN t4 ON t3.a=t4.a
10598         is in the query above.)
10599       */
10600       tab->last_inner= tab->first_inner= tab;
10601       tab->on_expr_ref= &tbl->on_expr;
10602       tab->cond_equal= tbl->cond_equal;
10603       if (embedding && !embedding->is_active_sjm())
10604         tab->first_upper= embedding->nested_join->first_nested;
10605     }
10606     else if (!embedding)
10607       tab->table->reginfo.not_exists_optimize= 0;
10608 
10609     for ( ; embedding ; embedding= embedding->embedding)
10610     {
10611       if (embedding->is_active_sjm())
10612       {
10613         /* We're trying to walk out of an SJ-Materialization nest. Don't do this.  */
10614         break;
10615       }
10616       /* Ignore sj-nests: */
10617       if (!(embedding->on_expr && embedding->outer_join))
10618       {
10619         tab->table->reginfo.not_exists_optimize= 0;
10620         continue;
10621       }
10622       NESTED_JOIN *nested_join= embedding->nested_join;
10623       if (!nested_join->counter)
10624       {
10625         /*
10626           Table tab is the first inner table for nested_join.
10627           Save reference to it in the nested join structure.
10628         */
10629         nested_join->first_nested= tab;
10630         tab->on_expr_ref= &embedding->on_expr;
10631         tab->cond_equal= tbl->cond_equal;
10632         if (embedding->embedding)
10633           tab->first_upper= embedding->embedding->nested_join->first_nested;
10634       }
10635       if (!tab->first_inner)
10636         tab->first_inner= nested_join->first_nested;
10637       if (++nested_join->counter < nested_join->n_tables)
10638         break;
10639       /* Table tab is the last inner table for nested join. */
10640       nested_join->first_nested->last_inner= tab;
10641     }
10642   }
10643   DBUG_RETURN(FALSE);
10644 }
10645 
10646 
10647 /*
10648   @brief
10649     Build a temporary join prefix condition for JOIN_TABs up to the last tab
10650 
10651   @param  ret  OUT  the condition is returned here
10652 
10653   @return
10654      false  OK
10655      true   Out of memory
10656 
10657   @detail
10658     Walk through the join prefix (from the first table to the last_tab) and
10659     build a condition:
10660 
10661     join_tab_1_cond AND join_tab_2_cond AND ... AND last_tab_conds
10662 
10663     The condition is only intended to be used by the range optimizer, so:
10664     - it is not normalized (can have Item_cond_and inside another
10665       Item_cond_and)
10666     - it does not include join->exec_const_cond and other similar conditions.
10667 */
10668 
build_tmp_join_prefix_cond(JOIN * join,JOIN_TAB * last_tab,Item ** ret)10669 bool build_tmp_join_prefix_cond(JOIN *join, JOIN_TAB *last_tab, Item **ret)
10670 {
10671   THD *const thd= join->thd;
10672   Item_cond_and *all_conds= NULL;
10673 
10674   Item *res= NULL;
10675 
10676   // Pick the ON-expression. Use the same logic as in get_sargable_cond():
10677   if (last_tab->on_expr_ref)
10678     res= *last_tab->on_expr_ref;
10679   else if (last_tab->table->pos_in_table_list &&
10680            last_tab->table->pos_in_table_list->embedding &&
10681            !last_tab->table->pos_in_table_list->embedding->sj_on_expr)
10682   {
10683     res= last_tab->table->pos_in_table_list->embedding->on_expr;
10684   }
10685 
10686   for (JOIN_TAB *tab= first_depth_first_tab(join);
10687        tab;
10688        tab= next_depth_first_tab(join, tab))
10689   {
10690     if (tab->select_cond)
10691     {
10692       if (!res)
10693         res= tab->select_cond;
10694       else
10695       {
10696         if (!all_conds)
10697         {
10698           if (!(all_conds= new (thd->mem_root)Item_cond_and(thd, res,
10699                                                             tab->select_cond)))
10700             return true;
10701           res= all_conds;
10702         }
10703         else
10704           all_conds->add(tab->select_cond, thd->mem_root);
10705       }
10706     }
10707     if (tab == last_tab)
10708       break;
10709   }
10710   *ret= all_conds? all_conds: res;
10711   return false;
10712 }
10713 
10714 
10715 static bool
make_join_select(JOIN * join,SQL_SELECT * select,COND * cond)10716 make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
10717 {
10718   THD *thd= join->thd;
10719   DBUG_ENTER("make_join_select");
10720   if (select)
10721   {
10722     add_not_null_conds(join);
10723     table_map used_tables;
10724     /*
10725       Step #1: Extract constant condition
10726        - Extract and check the constant part of the WHERE
10727        - Extract constant parts of ON expressions from outer
10728          joins and attach them appropriately.
10729     */
10730     if (cond)                /* Because of QUICK_GROUP_MIN_MAX_SELECT */
10731     {                        /* there may be a select without a cond. */
10732       if (join->table_count > 1)
10733         cond->update_used_tables();		// Tablenr may have changed
10734 
10735       /*
10736         Extract expressions that depend on constant tables
10737         1. Const part of the join's WHERE clause can be checked immediately
10738            and if it is not satisfied then the join has empty result
10739         2. Constant parts of outer joins' ON expressions must be attached
10740            there inside the triggers.
10741       */
10742       {						// Check const tables
10743         join->exec_const_cond=
10744 	  make_cond_for_table(thd, cond,
10745                               join->const_table_map,
10746                               (table_map) 0, -1, FALSE, FALSE);
10747         /* Add conditions added by add_not_null_conds(). */
10748         for (uint i= 0 ; i < join->const_tables ; i++)
10749           add_cond_and_fix(thd, &join->exec_const_cond,
10750                            join->join_tab[i].select_cond);
10751 
10752         DBUG_EXECUTE("where",print_where(join->exec_const_cond,"constants",
10753 					 QT_ORDINARY););
10754         if (join->exec_const_cond && !join->exec_const_cond->is_expensive() &&
10755             !join->exec_const_cond->val_int())
10756         {
10757           DBUG_PRINT("info",("Found impossible WHERE condition"));
10758           join->exec_const_cond= NULL;
10759           DBUG_RETURN(1);	 // Impossible const condition
10760         }
10761 
10762         if (join->table_count != join->const_tables)
10763         {
10764           COND *outer_ref_cond= make_cond_for_table(thd, cond,
10765                                                     join->const_table_map |
10766                                                     OUTER_REF_TABLE_BIT,
10767                                                     OUTER_REF_TABLE_BIT,
10768                                                     -1, FALSE, FALSE);
10769           if (outer_ref_cond)
10770           {
10771             add_cond_and_fix(thd, &outer_ref_cond, join->outer_ref_cond);
10772             join->outer_ref_cond= outer_ref_cond;
10773           }
10774         }
10775         else
10776         {
10777           COND *pseudo_bits_cond=
10778             make_cond_for_table(thd, cond,
10779                                 join->const_table_map |
10780                                 PSEUDO_TABLE_BITS,
10781                                 PSEUDO_TABLE_BITS,
10782                                 -1, FALSE, FALSE);
10783           if (pseudo_bits_cond)
10784           {
10785             add_cond_and_fix(thd, &pseudo_bits_cond,
10786                              join->pseudo_bits_cond);
10787             join->pseudo_bits_cond= pseudo_bits_cond;
10788           }
10789         }
10790       }
10791     }
10792 
10793     /*
10794       Step #2: Extract WHERE/ON parts
10795     */
10796     uint i;
10797     for (i= join->top_join_tab_count - 1; i >= join->const_tables; i--)
10798     {
10799       if (!join->join_tab[i].bush_children)
10800         break;
10801     }
10802     uint last_top_base_tab_idx= i;
10803 
10804     table_map save_used_tables= 0;
10805     used_tables=((select->const_tables=join->const_table_map) |
10806 		 OUTER_REF_TABLE_BIT | RAND_TABLE_BIT);
10807     JOIN_TAB *tab;
10808     table_map current_map;
10809     i= join->const_tables;
10810     for (tab= first_depth_first_tab(join); tab;
10811          tab= next_depth_first_tab(join, tab))
10812     {
10813       bool is_hj;
10814 
10815       /*
10816         first_inner is the X in queries like:
10817         SELECT * FROM t1 LEFT OUTER JOIN (t2 JOIN t3) ON X
10818       */
10819       JOIN_TAB *first_inner_tab= tab->first_inner;
10820 
10821       if (!tab->bush_children)
10822         current_map= tab->table->map;
10823       else
10824         current_map= tab->bush_children->start->emb_sj_nest->sj_inner_tables;
10825 
10826       bool use_quick_range=0;
10827       COND *tmp;
10828 
10829       /*
10830         Tables that are within SJ-Materialization nests cannot have their
10831         conditions referring to preceding non-const tables.
10832          - If we're looking at the first SJM table, reset used_tables
10833            to refer to only allowed tables
10834       */
10835       if (tab->emb_sj_nest && tab->emb_sj_nest->sj_mat_info &&
10836           tab->emb_sj_nest->sj_mat_info->is_used &&
10837           !(used_tables & tab->emb_sj_nest->sj_inner_tables))
10838       {
10839         save_used_tables= used_tables;
10840         used_tables= join->const_table_map | OUTER_REF_TABLE_BIT |
10841                      RAND_TABLE_BIT;
10842       }
10843 
10844       used_tables|=current_map;
10845 
10846       if (tab->type == JT_REF && tab->quick &&
10847 	  (((uint) tab->ref.key == tab->quick->index &&
10848 	    tab->ref.key_length < tab->quick->max_used_key_length) ||
10849            (!is_hash_join_key_no(tab->ref.key) &&
10850             tab->table->intersect_keys.is_set(tab->ref.key))))
10851       {
10852 	/* Range uses longer key;  Use this instead of ref on key */
10853 	tab->type=JT_ALL;
10854 	use_quick_range=1;
10855 	tab->use_quick=1;
10856         tab->ref.key= -1;
10857 	tab->ref.key_parts=0;		// Don't use ref key.
10858 	join->best_positions[i].records_read= rows2double(tab->quick->records);
10859         /*
10860           We will use join cache here : prevent sorting of the first
10861           table only and sort at the end.
10862         */
10863         if (i != join->const_tables &&
10864             join->table_count > join->const_tables + 1 &&
10865             join->best_positions[i].use_join_buffer)
10866           join->full_join= 1;
10867       }
10868 
10869       tmp= NULL;
10870 
10871       if (cond)
10872       {
10873         if (tab->bush_children)
10874         {
10875           // Reached the materialization tab
10876           tmp= make_cond_after_sjm(thd, cond, cond, save_used_tables,
10877                                    used_tables, /*inside_or_clause=*/FALSE);
10878           used_tables= save_used_tables | used_tables;
10879           save_used_tables= 0;
10880         }
10881         else
10882         {
10883           tmp= make_cond_for_table(thd, cond, used_tables, current_map, i,
10884                                    FALSE, FALSE);
10885           if (tab == join->join_tab + last_top_base_tab_idx)
10886           {
10887             /*
10888               This pushes conjunctive conditions of WHERE condition such that:
10889               - their used_tables() contain RAND_TABLE_BIT
10890               - the conditions does not refer to any fields
10891               (such like rand() > 0.5)
10892             */
10893             table_map rand_table_bit= (table_map) RAND_TABLE_BIT;
10894             COND *rand_cond= make_cond_for_table(thd, cond, used_tables,
10895                                                  rand_table_bit, -1,
10896                                                  FALSE, FALSE);
10897             add_cond_and_fix(thd, &tmp, rand_cond);
10898           }
10899         }
10900         /* Add conditions added by add_not_null_conds(). */
10901         if (tab->select_cond)
10902           add_cond_and_fix(thd, &tmp, tab->select_cond);
10903       }
10904 
10905       is_hj= (tab->type == JT_REF || tab->type == JT_EQ_REF) &&
10906              (join->allowed_join_cache_types & JOIN_CACHE_HASHED_BIT) &&
10907 	     ((join->max_allowed_join_cache_level+1)/2 == 2 ||
10908               ((join->max_allowed_join_cache_level+1)/2 > 2 &&
10909 	       is_hash_join_key_no(tab->ref.key))) &&
10910               (!tab->emb_sj_nest ||
10911                join->allowed_semijoin_with_cache) &&
10912               (!(tab->table->map & join->outer_join) ||
10913                join->allowed_outer_join_with_cache);
10914 
10915       if (cond && !tmp && tab->quick)
10916       {						// Outer join
10917         if (tab->type != JT_ALL && !is_hj)
10918         {
10919           /*
10920             Don't use the quick method
10921             We come here in the case where we have 'key=constant' and
10922             the test is removed by make_cond_for_table()
10923           */
10924           delete tab->quick;
10925           tab->quick= 0;
10926         }
10927         else
10928         {
10929           /*
10930             Hack to handle the case where we only refer to a table
10931             in the ON part of an OUTER JOIN. In this case we want the code
10932             below to check if we should use 'quick' instead.
10933           */
10934           DBUG_PRINT("info", ("Item_int"));
10935           tmp= new (thd->mem_root) Item_int(thd, (longlong) 1, 1); // Always true
10936         }
10937 
10938       }
10939       if (tmp || !cond || tab->type == JT_REF || tab->type == JT_REF_OR_NULL ||
10940           tab->type == JT_EQ_REF || first_inner_tab)
10941       {
10942         DBUG_EXECUTE("where",print_where(tmp,
10943                                          tab->table? tab->table->alias.c_ptr() :"sjm-nest",
10944                                          QT_ORDINARY););
10945 	SQL_SELECT *sel= tab->select= ((SQL_SELECT*)
10946                                        thd->memdup((uchar*) select,
10947                                                    sizeof(*select)));
10948 	if (!sel)
10949 	  DBUG_RETURN(1);			// End of memory
10950         /*
10951           If tab is an inner table of an outer join operation,
10952           add a match guard to the pushed down predicate.
10953           The guard will turn the predicate on only after
10954           the first match for outer tables is encountered.
10955 	*/
10956         if (cond && tmp)
10957         {
10958           /*
10959             Because of QUICK_GROUP_MIN_MAX_SELECT there may be a select without
10960             a cond, so neutralize the hack above.
10961           */
10962           COND *tmp_cond;
10963           if (!(tmp_cond= add_found_match_trig_cond(thd, first_inner_tab, tmp,
10964                                                     0)))
10965             DBUG_RETURN(1);
10966           sel->cond= tmp_cond;
10967           tab->set_select_cond(tmp_cond, __LINE__);
10968           /* Push condition to storage engine if this is enabled
10969              and the condition is not guarded */
10970           if (tab->table)
10971           {
10972             tab->table->file->pushed_cond= NULL;
10973             if ((tab->table->file->ha_table_flags() &
10974                   HA_CAN_TABLE_CONDITION_PUSHDOWN) &&
10975                 !first_inner_tab)
10976             {
10977               COND *push_cond=
10978               make_cond_for_table(thd, tmp_cond, current_map, current_map,
10979                                   -1, FALSE, FALSE);
10980               if (push_cond)
10981               {
10982                 /* Push condition to handler */
10983                 if (!tab->table->file->cond_push(push_cond))
10984                   tab->table->file->pushed_cond= push_cond;
10985               }
10986             }
10987           }
10988         }
10989         else
10990         {
10991           sel->cond= NULL;
10992           tab->set_select_cond(NULL, __LINE__);
10993         }
10994 
10995 	sel->head=tab->table;
10996         DBUG_EXECUTE("where",
10997                      print_where(tmp,
10998                                  tab->table ? tab->table->alias.c_ptr() :
10999                                    "(sjm-nest)",
11000                                  QT_ORDINARY););
11001 	if (tab->quick)
11002 	{
11003 	  /* Use quick key read if it's a constant and it's not used
11004 	     with key reading */
11005           if ((tab->needed_reg.is_clear_all() && tab->type != JT_EQ_REF &&
11006               tab->type != JT_FT &&
11007               ((tab->type != JT_CONST && tab->type != JT_REF) ||
11008                (uint) tab->ref.key == tab->quick->index)) || is_hj)
11009           {
11010             DBUG_ASSERT(tab->quick->is_valid());
11011 	    sel->quick=tab->quick;		// Use value from get_quick_...
11012 	    sel->quick_keys.clear_all();
11013 	    sel->needed_reg.clear_all();
11014 	  }
11015 	  else
11016 	  {
11017 	    delete tab->quick;
11018 	  }
11019 	  tab->quick=0;
11020 	}
11021 	uint ref_key= sel->head? (uint) sel->head->reginfo.join_tab->ref.key+1 : 0;
11022 	if (i == join->const_tables && ref_key)
11023 	{
11024 	  if (!tab->const_keys.is_clear_all() &&
11025               tab->table->reginfo.impossible_range)
11026 	    DBUG_RETURN(1);
11027 	}
11028 	else if (tab->type == JT_ALL && ! use_quick_range)
11029 	{
11030 	  if (!tab->const_keys.is_clear_all() &&
11031 	      tab->table->reginfo.impossible_range)
11032 	    DBUG_RETURN(1);				// Impossible range
11033 	  /*
11034 	    We plan to scan all rows.
11035 	    Check again if we should use an index.
11036 
11037             There are two cases:
11038             1) There could be an index usage the refers to a previous
11039                table that we didn't consider before, but could be consider
11040                now as a "last resort". For example
11041                SELECT * from t1,t2 where t1.a between t2.a and t2.b;
11042             2) If the current table is the first non const table
11043                and there is a limit it still possibly beneficial
11044                to use the index even if the index range is big as
11045                we can stop when we've found limit rows.
11046 
11047             (1) - Don't switch the used index if we are using semi-join
11048                   LooseScan on this table. Using different index will not
11049                   produce the desired ordering and de-duplication.
11050 	  */
11051 
11052 	  if (!tab->table->is_filled_at_execution() &&
11053               !tab->loosescan_match_tab &&              // (1)
11054               ((cond && (!tab->keys.is_subset(tab->const_keys) && i > 0)) ||
11055                (!tab->const_keys.is_clear_all() && i == join->const_tables &&
11056                 join->unit->select_limit_cnt <
11057                 join->best_positions[i].records_read &&
11058                 !(join->select_options & OPTION_FOUND_ROWS))))
11059 	  {
11060 	    /* Join with outer join condition */
11061 	    COND *orig_cond=sel->cond;
11062 
11063             if (build_tmp_join_prefix_cond(join, tab, &sel->cond))
11064               return true;
11065 
11066 	    /*
11067               We can't call sel->cond->fix_fields,
11068               as it will break tab->on_expr if it's AND condition
11069               (fix_fields currently removes extra AND/OR levels).
11070               Yet attributes of the just built condition are not needed.
11071               Thus we call sel->cond->quick_fix_field for safety.
11072 	    */
11073 	    if (sel->cond && !sel->cond->fixed)
11074 	      sel->cond->quick_fix_field();
11075 
11076 	    if (sel->test_quick_select(thd, tab->keys,
11077 				       ((used_tables & ~ current_map) |
11078                                         OUTER_REF_TABLE_BIT),
11079 				       (join->select_options &
11080 					OPTION_FOUND_ROWS ?
11081 					HA_POS_ERROR :
11082 					join->unit->select_limit_cnt), 0,
11083                                         FALSE, FALSE) < 0)
11084             {
11085 	      /*
11086 		Before reporting "Impossible WHERE" for the whole query
11087 		we have to check isn't it only "impossible ON" instead
11088 	      */
11089               sel->cond=orig_cond;
11090               if (!*tab->on_expr_ref ||
11091                   sel->test_quick_select(thd, tab->keys,
11092                                          used_tables & ~ current_map,
11093                                          (join->select_options &
11094                                           OPTION_FOUND_ROWS ?
11095                                           HA_POS_ERROR :
11096                                           join->unit->select_limit_cnt),0,
11097                                           FALSE, FALSE) < 0)
11098 		DBUG_RETURN(1);			// Impossible WHERE
11099             }
11100             else
11101 	      sel->cond=orig_cond;
11102 
11103 	    /* Fix for EXPLAIN */
11104 	    if (sel->quick)
11105 	      join->best_positions[i].records_read= (double)sel->quick->records;
11106 	  }
11107 	  else
11108 	  {
11109 	    sel->needed_reg=tab->needed_reg;
11110 	  }
11111 	  sel->quick_keys= tab->table->quick_keys;
11112 	  if (!sel->quick_keys.is_subset(tab->checked_keys) ||
11113               !sel->needed_reg.is_subset(tab->checked_keys))
11114 	  {
11115             /*
11116               "Range checked for each record" is a "last resort" access method
11117               that should only be used when the other option is a cross-product
11118               join.
11119 
11120               We use the following condition (it's approximate):
11121               1. There are potential keys for (sel->needed_reg)
11122               2. There were no possible ways to construct a quick select, or
11123                  the quick select would be more expensive than the full table
11124                  scan.
11125             */
11126 	    tab->use_quick= (!sel->needed_reg.is_clear_all() &&
11127 			     (sel->quick_keys.is_clear_all() ||
11128                               (sel->quick &&
11129                                sel->quick->read_time >
11130                                tab->table->file->scan_time() +
11131                                tab->table->file->stats.records/TIME_FOR_COMPARE
11132                                ))) ?
11133 	      2 : 1;
11134 	    sel->read_tables= used_tables & ~current_map;
11135             sel->quick_keys.clear_all();
11136 	  }
11137 	  if (i != join->const_tables && tab->use_quick != 2 &&
11138               !tab->first_inner)
11139 	  {					/* Read with cache */
11140             /*
11141               TODO: the execution also gets here when we will not be using
11142               join buffer. Review these cases and perhaps, remove this call.
11143               (The final decision whether to use join buffer is made in
11144               check_join_cache_usage, so we should only call make_scan_filter()
11145               there, too).
11146             */
11147             if (tab->make_scan_filter())
11148               DBUG_RETURN(1);
11149           }
11150 	}
11151       }
11152 
11153       /*
11154         Push down conditions from all ON expressions.
11155         Each of these conditions are guarded by a variable
11156         that turns if off just before null complemented row for
11157         outer joins is formed. Thus, the condition from an
11158         'on expression' are guaranteed not to be checked for
11159         the null complemented row.
11160       */
11161 
11162       /*
11163         First push down constant conditions from ON expressions.
11164          - Each pushed-down condition is wrapped into trigger which is
11165            enabled only for non-NULL-complemented record
11166          - The condition is attached to the first_inner_table.
11167 
11168         With regards to join nests:
11169          - if we start at top level, don't walk into nests
11170          - if we start inside a nest, stay within that nest.
11171       */
11172       JOIN_TAB *start_from= tab->bush_root_tab?
11173                                tab->bush_root_tab->bush_children->start :
11174                                join->join_tab + join->const_tables;
11175       JOIN_TAB *end_with= tab->bush_root_tab?
11176                                tab->bush_root_tab->bush_children->end :
11177                                join->join_tab + join->top_join_tab_count;
11178       for (JOIN_TAB *join_tab= start_from;
11179            join_tab != end_with;
11180            join_tab++)
11181       {
11182         if (*join_tab->on_expr_ref)
11183         {
11184           JOIN_TAB *cond_tab= join_tab->first_inner;
11185           COND *tmp_cond= make_cond_for_table(thd, *join_tab->on_expr_ref,
11186                                               join->const_table_map,
11187                                               (table_map) 0, -1, FALSE, FALSE);
11188           if (!tmp_cond)
11189             continue;
11190           tmp_cond= new (thd->mem_root) Item_func_trig_cond(thd, tmp_cond,
11191                                             &cond_tab->not_null_compl);
11192           if (!tmp_cond)
11193             DBUG_RETURN(1);
11194           tmp_cond->quick_fix_field();
11195           cond_tab->select_cond= !cond_tab->select_cond ? tmp_cond :
11196                                  new (thd->mem_root) Item_cond_and(thd, cond_tab->select_cond,
11197                                                    tmp_cond);
11198           if (!cond_tab->select_cond)
11199 	    DBUG_RETURN(1);
11200           cond_tab->select_cond->quick_fix_field();
11201           cond_tab->select_cond->update_used_tables();
11202           if (cond_tab->select)
11203             cond_tab->select->cond= cond_tab->select_cond;
11204         }
11205       }
11206 
11207 
11208       /* Push down non-constant conditions from ON expressions */
11209       JOIN_TAB *last_tab= tab;
11210 
11211       /*
11212         while we're inside of an outer join and last_tab is
11213         the last of its tables ...
11214       */
11215       while (first_inner_tab && first_inner_tab->last_inner == last_tab)
11216       {
11217         /*
11218           Table tab is the last inner table of an outer join.
11219           An on expression is always attached to it.
11220 	*/
11221         COND *on_expr= *first_inner_tab->on_expr_ref;
11222 
11223         table_map used_tables2= (join->const_table_map |
11224                                  OUTER_REF_TABLE_BIT | RAND_TABLE_BIT);
11225 
11226         start_from= tab->bush_root_tab?
11227                       tab->bush_root_tab->bush_children->start :
11228                       join->join_tab + join->const_tables;
11229         for (JOIN_TAB *inner_tab= start_from;
11230              inner_tab <= last_tab;
11231              inner_tab++)
11232         {
11233           DBUG_ASSERT(inner_tab->table);
11234           current_map= inner_tab->table->map;
11235           used_tables2|= current_map;
11236           /*
11237             psergey: have put the -1 below. It's bad, will need to fix it.
11238           */
11239           COND *tmp_cond= make_cond_for_table(thd, on_expr, used_tables2,
11240                                               current_map,
11241                                               /*(inner_tab - first_tab)*/ -1,
11242 					      FALSE, FALSE);
11243           if (tab == last_tab)
11244           {
11245             /*
11246               This pushes conjunctive conditions of ON expression of an outer
11247               join such that:
11248               - their used_tables() contain RAND_TABLE_BIT
11249               - the conditions does not refer to any fields
11250               (such like rand() > 0.5)
11251             */
11252             table_map rand_table_bit= (table_map) RAND_TABLE_BIT;
11253             COND *rand_cond= make_cond_for_table(thd, on_expr, used_tables2,
11254                                                  rand_table_bit, -1,
11255                                                  FALSE, FALSE);
11256             add_cond_and_fix(thd, &tmp_cond, rand_cond);
11257           }
11258           bool is_sjm_lookup_tab= FALSE;
11259           if (inner_tab->bush_children)
11260           {
11261             /*
11262               'inner_tab' is an SJ-Materialization tab, i.e. we have a join
11263               order like this:
11264 
11265                 ot1 sjm_tab LEFT JOIN ot2 ot3
11266                          ^          ^
11267                    'tab'-+          +--- left join we're adding triggers for
11268 
11269               LEFT JOIN's ON expression may not have references to subquery
11270               columns.  The subquery was in the WHERE clause, so IN-equality
11271               is in the WHERE clause, also.
11272               However, equality propagation code may have propagated the
11273               IN-equality into ON expression, and we may get things like
11274 
11275                 subquery_inner_table=const
11276 
11277               in the ON expression. We must not check such conditions during
11278               SJM-lookup, because 1) subquery_inner_table has no valid current
11279               row (materialization temp.table has it instead), and 2) they
11280               would be true anyway.
11281             */
11282             SJ_MATERIALIZATION_INFO *sjm=
11283               inner_tab->bush_children->start->emb_sj_nest->sj_mat_info;
11284             if (sjm->is_used && !sjm->is_sj_scan)
11285               is_sjm_lookup_tab= TRUE;
11286           }
11287 
11288           if (inner_tab == first_inner_tab && inner_tab->on_precond &&
11289               !is_sjm_lookup_tab)
11290             add_cond_and_fix(thd, &tmp_cond, inner_tab->on_precond);
11291           if (tmp_cond && !is_sjm_lookup_tab)
11292           {
11293             JOIN_TAB *cond_tab=  (inner_tab < first_inner_tab ?
11294                                   first_inner_tab : inner_tab);
11295             Item **sel_cond_ref= (inner_tab < first_inner_tab ?
11296                                   &first_inner_tab->on_precond :
11297                                   &inner_tab->select_cond);
11298             /*
11299               First add the guards for match variables of
11300               all embedding outer join operations.
11301 	    */
11302             if (!(tmp_cond= add_found_match_trig_cond(thd,
11303                                                      cond_tab->first_inner,
11304                                                      tmp_cond,
11305                                                      first_inner_tab)))
11306               DBUG_RETURN(1);
11307             /*
11308               Now add the guard turning the predicate off for
11309               the null complemented row.
11310 	    */
11311             DBUG_PRINT("info", ("Item_func_trig_cond"));
11312             tmp_cond= new (thd->mem_root) Item_func_trig_cond(thd, tmp_cond,
11313                                               &first_inner_tab->
11314                                               not_null_compl);
11315             DBUG_PRINT("info", ("Item_func_trig_cond %p",
11316                                 tmp_cond));
11317             if (tmp_cond)
11318               tmp_cond->quick_fix_field();
11319 	    /* Add the predicate to other pushed down predicates */
11320             DBUG_PRINT("info", ("Item_cond_and"));
11321             *sel_cond_ref= !(*sel_cond_ref) ?
11322                              tmp_cond :
11323                              new (thd->mem_root) Item_cond_and(thd, *sel_cond_ref, tmp_cond);
11324             DBUG_PRINT("info", ("Item_cond_and %p",
11325                                 (*sel_cond_ref)));
11326             if (!(*sel_cond_ref))
11327               DBUG_RETURN(1);
11328             (*sel_cond_ref)->quick_fix_field();
11329             (*sel_cond_ref)->update_used_tables();
11330             if (cond_tab->select)
11331               cond_tab->select->cond= cond_tab->select_cond;
11332           }
11333         }
11334         first_inner_tab= first_inner_tab->first_upper;
11335       }
11336       if (!tab->bush_children)
11337         i++;
11338     }
11339   }
11340   DBUG_RETURN(0);
11341 }
11342 
11343 
11344 static
get_next_field_for_derived_key(uchar * arg)11345 uint get_next_field_for_derived_key(uchar *arg)
11346 {
11347   KEYUSE *keyuse= *(KEYUSE **) arg;
11348   if (!keyuse)
11349     return (uint) (-1);
11350   TABLE *table= keyuse->table;
11351   uint key= keyuse->key;
11352   uint fldno= keyuse->keypart;
11353   uint keypart= keyuse->keypart_map == (key_part_map) 1 ?
11354                                          0 : (keyuse-1)->keypart+1;
11355   for ( ;
11356         keyuse->table == table && keyuse->key == key && keyuse->keypart == fldno;
11357         keyuse++)
11358     keyuse->keypart= keypart;
11359   if (keyuse->key != key)
11360     keyuse= 0;
11361   *((KEYUSE **) arg)= keyuse;
11362   return fldno;
11363 }
11364 
11365 
11366 static
get_next_field_for_derived_key_simple(uchar * arg)11367 uint get_next_field_for_derived_key_simple(uchar *arg)
11368 {
11369   KEYUSE *keyuse= *(KEYUSE **) arg;
11370   if (!keyuse)
11371     return (uint) (-1);
11372   TABLE *table= keyuse->table;
11373   uint key= keyuse->key;
11374   uint fldno= keyuse->keypart;
11375   for ( ;
11376         keyuse->table == table && keyuse->key == key && keyuse->keypart == fldno;
11377         keyuse++)
11378     ;
11379   if (keyuse->key != key)
11380     keyuse= 0;
11381   *((KEYUSE **) arg)= keyuse;
11382   return fldno;
11383 }
11384 
11385 static
generate_derived_keys_for_table(KEYUSE * keyuse,uint count,uint keys)11386 bool generate_derived_keys_for_table(KEYUSE *keyuse, uint count, uint keys)
11387 {
11388   TABLE *table= keyuse->table;
11389   if (table->alloc_keys(keys))
11390     return TRUE;
11391   uint key_count= 0;
11392   KEYUSE *first_keyuse= keyuse;
11393   uint prev_part= keyuse->keypart;
11394   uint parts= 0;
11395   uint i= 0;
11396 
11397   for ( ; i < count && key_count < keys; )
11398   {
11399     do
11400     {
11401       keyuse->key= table->s->keys;
11402       keyuse->keypart_map= (key_part_map) (1 << parts);
11403       keyuse++;
11404       i++;
11405     }
11406     while (i < count && keyuse->used_tables == first_keyuse->used_tables &&
11407            keyuse->keypart == prev_part);
11408     parts++;
11409     if (i < count && keyuse->used_tables == first_keyuse->used_tables)
11410     {
11411       prev_part= keyuse->keypart;
11412     }
11413     else
11414     {
11415       KEYUSE *save_first_keyuse= first_keyuse;
11416       if (table->check_tmp_key(table->s->keys, parts,
11417                                get_next_field_for_derived_key_simple,
11418                                (uchar *) &first_keyuse))
11419 
11420       {
11421         first_keyuse= save_first_keyuse;
11422         if (table->add_tmp_key(table->s->keys, parts,
11423                                get_next_field_for_derived_key,
11424                                (uchar *) &first_keyuse,
11425                                FALSE))
11426           return TRUE;
11427         table->reginfo.join_tab->keys.set_bit(table->s->keys);
11428       }
11429       else
11430       {
11431         /* Mark keyuses for this key to be excluded */
11432         for (KEYUSE *curr=save_first_keyuse; curr < keyuse; curr++)
11433 	{
11434           curr->key= MAX_KEY;
11435         }
11436       }
11437       first_keyuse= keyuse;
11438       key_count++;
11439       parts= 0;
11440       prev_part= keyuse->keypart;
11441     }
11442   }
11443 
11444   return FALSE;
11445 }
11446 
11447 
11448 static
generate_derived_keys(DYNAMIC_ARRAY * keyuse_array)11449 bool generate_derived_keys(DYNAMIC_ARRAY *keyuse_array)
11450 {
11451   KEYUSE *keyuse= dynamic_element(keyuse_array, 0, KEYUSE*);
11452   uint elements= keyuse_array->elements;
11453   TABLE *prev_table= 0;
11454   for (uint i= 0; i < elements; i++, keyuse++)
11455   {
11456     if (!keyuse->table)
11457       break;
11458     KEYUSE *first_table_keyuse= NULL;
11459     table_map last_used_tables= 0;
11460     uint count= 0;
11461     uint keys= 0;
11462     TABLE_LIST *derived= NULL;
11463     if (keyuse->table != prev_table)
11464       derived= keyuse->table->pos_in_table_list;
11465     while (derived && derived->is_materialized_derived())
11466     {
11467       if (keyuse->table != prev_table)
11468       {
11469         prev_table= keyuse->table;
11470         while (keyuse->table == prev_table && keyuse->key != MAX_KEY)
11471 	{
11472           keyuse++;
11473           i++;
11474         }
11475         if (keyuse->table != prev_table)
11476 	{
11477           keyuse--;
11478           i--;
11479           derived= NULL;
11480           continue;
11481         }
11482         first_table_keyuse= keyuse;
11483         last_used_tables= keyuse->used_tables;
11484         count= 0;
11485         keys= 0;
11486       }
11487       else if (keyuse->used_tables != last_used_tables)
11488       {
11489         keys++;
11490         last_used_tables= keyuse->used_tables;
11491       }
11492       count++;
11493       keyuse++;
11494       i++;
11495       if (keyuse->table != prev_table)
11496       {
11497         if (generate_derived_keys_for_table(first_table_keyuse, count, ++keys))
11498           return TRUE;
11499         keyuse--;
11500         i--;
11501 	derived= NULL;
11502       }
11503     }
11504   }
11505   return FALSE;
11506 }
11507 
11508 
11509 /*
11510   @brief
11511   Drops unused keys for each materialized derived table/view
11512 
11513   @details
11514   For materialized derived tables only ref access can be used, it employs
11515   only one index, thus we don't need the rest. For each materialized derived
11516   table/view call TABLE::use_index to save one index chosen by the optimizer
11517   and free others. No key is chosen then all keys will be dropped.
11518 */
11519 
drop_unused_derived_keys()11520 void JOIN::drop_unused_derived_keys()
11521 {
11522   JOIN_TAB *tab;
11523   for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
11524        tab;
11525        tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
11526   {
11527 
11528     TABLE *tmp_tbl= tab->table;
11529     if (!tmp_tbl)
11530       continue;
11531     if (!tmp_tbl->pos_in_table_list->is_materialized_derived())
11532       continue;
11533     if (tmp_tbl->max_keys > 1 && !tab->is_ref_for_hash_join())
11534       tmp_tbl->use_index(tab->ref.key);
11535     if (tmp_tbl->s->keys)
11536     {
11537       if (tab->ref.key >= 0 && tab->ref.key < MAX_KEY)
11538         tab->ref.key= 0;
11539       else
11540         tmp_tbl->s->keys= 0;
11541     }
11542     tab->keys= (key_map) (tmp_tbl->s->keys ? 1 : 0);
11543   }
11544 }
11545 
11546 
11547 /*
11548   Evaluate the bitmap of used tables for items from the select list
11549 */
11550 
eval_select_list_used_tables()11551 inline void JOIN::eval_select_list_used_tables()
11552 {
11553   select_list_used_tables= 0;
11554   Item *item;
11555   List_iterator_fast<Item> it(fields_list);
11556   while ((item= it++))
11557   {
11558     select_list_used_tables|= item->used_tables();
11559   }
11560   Item_outer_ref *ref;
11561   List_iterator_fast<Item_outer_ref> ref_it(select_lex->inner_refs_list);
11562   while ((ref= ref_it++))
11563   {
11564     item= ref->outer_ref;
11565     select_list_used_tables|= item->used_tables();
11566   }
11567 }
11568 
11569 
11570 /*
11571   Determine {after which table we'll produce ordered set}
11572 
11573   SYNOPSIS
11574     make_join_orderinfo()
11575      join
11576 
11577 
11578   DESCRIPTION
11579     Determine if the set is already ordered for ORDER BY, so it can
11580     disable join cache because it will change the ordering of the results.
11581     Code handles sort table that is at any location (not only first after
11582     the const tables) despite the fact that it's currently prohibited.
11583     We must disable join cache if the first non-const table alone is
11584     ordered. If there is a temp table the ordering is done as a last
11585     operation and doesn't prevent join cache usage.
11586 
11587   RETURN
11588     Number of table after which the set will be ordered
11589     join->tables if we don't need an ordered set
11590 */
11591 
make_join_orderinfo(JOIN * join)11592 static uint make_join_orderinfo(JOIN *join)
11593 {
11594   /*
11595     This function needs to be fixed to take into account that we now have SJM
11596     nests.
11597   */
11598   DBUG_ASSERT(0);
11599 
11600   JOIN_TAB *tab;
11601   if (join->need_tmp)
11602     return join->table_count;
11603   tab= join->get_sort_by_join_tab();
11604   return tab ? (uint)(tab-join->join_tab) : join->table_count;
11605 }
11606 
11607 /*
11608   Deny usage of join buffer for the specified table
11609 
11610   SYNOPSIS
11611     set_join_cache_denial()
11612       tab    join table for which join buffer usage is to be denied
11613 
11614   DESCRIPTION
11615     The function denies usage of join buffer when joining the table 'tab'.
11616     The table is marked as not employing any join buffer. If a join cache
11617     object has been already allocated for the table this object is destroyed.
11618 
11619   RETURN
11620     none
11621 */
11622 
11623 static
set_join_cache_denial(JOIN_TAB * join_tab)11624 void set_join_cache_denial(JOIN_TAB *join_tab)
11625 {
11626   if (join_tab->cache)
11627   {
11628     /*
11629       If there is a previous cache linked to this cache through the
11630       next_cache pointer: remove the link.
11631     */
11632     if (join_tab->cache->prev_cache)
11633       join_tab->cache->prev_cache->next_cache= 0;
11634     /*
11635       Same for the next_cache
11636     */
11637     if (join_tab->cache->next_cache)
11638       join_tab->cache->next_cache->prev_cache= 0;
11639 
11640     join_tab->cache->free();
11641     join_tab->cache= 0;
11642   }
11643   if (join_tab->use_join_cache)
11644   {
11645     join_tab->use_join_cache= FALSE;
11646     join_tab->used_join_cache_level= 0;
11647     /*
11648       It could be only sub_select(). It could not be sub_seject_sjm because we
11649       don't do join buffering for the first table in sjm nest.
11650     */
11651     join_tab[-1].next_select= sub_select;
11652     if (join_tab->type == JT_REF && join_tab->is_ref_for_hash_join())
11653     {
11654       join_tab->type= JT_ALL;
11655       join_tab->ref.key_parts= 0;
11656     }
11657     join_tab->join->return_tab= join_tab;
11658   }
11659 }
11660 
11661 
11662 /**
11663   The default implementation of unlock-row method of READ_RECORD,
11664   used in all access methods.
11665 */
11666 
rr_unlock_row(st_join_table * tab)11667 void rr_unlock_row(st_join_table *tab)
11668 {
11669   READ_RECORD *info= &tab->read_record;
11670   info->table->file->unlock_row();
11671 }
11672 
11673 
11674 /**
11675   Pick the appropriate access method functions
11676 
11677   Sets the functions for the selected table access method
11678 
11679   @param      tab               Table reference to put access method
11680 */
11681 
11682 static void
pick_table_access_method(JOIN_TAB * tab)11683 pick_table_access_method(JOIN_TAB *tab)
11684 {
11685   switch (tab->type)
11686   {
11687   case JT_REF:
11688     tab->read_first_record= join_read_always_key;
11689     tab->read_record.read_record_func= join_read_next_same;
11690     break;
11691 
11692   case JT_REF_OR_NULL:
11693     tab->read_first_record= join_read_always_key_or_null;
11694     tab->read_record.read_record_func= join_read_next_same_or_null;
11695     break;
11696 
11697   case JT_CONST:
11698     tab->read_first_record= join_read_const;
11699     tab->read_record.read_record_func= join_no_more_records;
11700     break;
11701 
11702   case JT_EQ_REF:
11703     tab->read_first_record= join_read_key;
11704     tab->read_record.read_record_func= join_no_more_records;
11705     break;
11706 
11707   case JT_FT:
11708     tab->read_first_record= join_ft_read_first;
11709     tab->read_record.read_record_func= join_ft_read_next;
11710     break;
11711 
11712   case JT_SYSTEM:
11713     tab->read_first_record= join_read_system;
11714     tab->read_record.read_record_func= join_no_more_records;
11715     break;
11716 
11717   /* keep gcc happy */
11718   default:
11719     break;
11720   }
11721 }
11722 
11723 
11724 /*
11725   Revise usage of join buffer for the specified table and the whole nest
11726 
11727   SYNOPSIS
11728     revise_cache_usage()
11729       tab    join table for which join buffer usage is to be revised
11730 
11731   DESCRIPTION
11732     The function revise the decision to use a join buffer for the table 'tab'.
11733     If this table happened to be among the inner tables of a nested outer join/
11734     semi-join the functions denies usage of join buffers for all of them
11735 
11736   RETURN
11737     none
11738 */
11739 
11740 static
revise_cache_usage(JOIN_TAB * join_tab)11741 void revise_cache_usage(JOIN_TAB *join_tab)
11742 {
11743   JOIN_TAB *tab;
11744   JOIN_TAB *first_inner;
11745 
11746   if (join_tab->first_inner)
11747   {
11748     JOIN_TAB *end_tab= join_tab;
11749     for (first_inner= join_tab->first_inner;
11750          first_inner;
11751          first_inner= first_inner->first_upper)
11752     {
11753       for (tab= end_tab; tab >= first_inner; tab--)
11754         set_join_cache_denial(tab);
11755       end_tab= first_inner;
11756     }
11757   }
11758   else if (join_tab->first_sj_inner_tab)
11759   {
11760     first_inner= join_tab->first_sj_inner_tab;
11761     for (tab= join_tab; tab >= first_inner; tab--)
11762     {
11763       set_join_cache_denial(tab);
11764     }
11765   }
11766   else set_join_cache_denial(join_tab);
11767 }
11768 
11769 
11770 /*
11771   end_select-compatible function that writes the record into a sjm temptable
11772 
11773   SYNOPSIS
11774     end_sj_materialize()
11775       join            The join
11776       join_tab        Points to right after the last join_tab in materialization bush
11777       end_of_records  FALSE <=> This call is made to pass another record
11778                                 combination
11779                       TRUE  <=> EOF (no action)
11780 
11781   DESCRIPTION
11782     This function is used by semi-join materialization to capture suquery's
11783     resultset and write it into the temptable (that is, materialize it).
11784 
11785   NOTE
11786     This function is used only for semi-join materialization. Non-semijoin
11787     materialization uses different mechanism.
11788 
11789   RETURN
11790     NESTED_LOOP_OK
11791     NESTED_LOOP_ERROR
11792 */
11793 
11794 enum_nested_loop_state
end_sj_materialize(JOIN * join,JOIN_TAB * join_tab,bool end_of_records)11795 end_sj_materialize(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
11796 {
11797   int error;
11798   THD *thd= join->thd;
11799   SJ_MATERIALIZATION_INFO *sjm= join_tab[-1].emb_sj_nest->sj_mat_info;
11800   DBUG_ENTER("end_sj_materialize");
11801   if (!end_of_records)
11802   {
11803     TABLE *table= sjm->table;
11804 
11805     List_iterator<Item> it(sjm->sjm_table_cols);
11806     Item *item;
11807     while ((item= it++))
11808     {
11809       if (item->is_null())
11810         DBUG_RETURN(NESTED_LOOP_OK);
11811     }
11812     fill_record(thd, table, table->field, sjm->sjm_table_cols, TRUE, FALSE);
11813     if (unlikely(thd->is_error()))
11814       DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
11815     if (unlikely((error= table->file->ha_write_tmp_row(table->record[0]))))
11816     {
11817       /* create_myisam_from_heap will generate error if needed */
11818       if (table->file->is_fatal_error(error, HA_CHECK_DUP) &&
11819           create_internal_tmp_table_from_heap(thd, table,
11820                                               sjm->sjm_table_param.start_recinfo,
11821                                               &sjm->sjm_table_param.recinfo, error, 1, NULL))
11822         DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
11823     }
11824   }
11825   DBUG_RETURN(NESTED_LOOP_OK);
11826 }
11827 
11828 
11829 /*
11830   Check whether a join buffer can be used to join the specified table
11831 
11832   SYNOPSIS
11833     check_join_cache_usage()
11834       tab                 joined table to check join buffer usage for
11835       options             options of the join
11836       no_jbuf_after       don't use join buffering after table with this number
11837       prev_tab            previous join table
11838 
11839   DESCRIPTION
11840     The function finds out whether the table 'tab' can be joined using a join
11841     buffer. This check is performed after the best execution plan for 'join'
11842     has been chosen. If the function decides that a join buffer can be employed
11843     then it selects the most appropriate join cache object that contains this
11844     join buffer.
11845     The result of the check and the type of the the join buffer to be used
11846     depend on:
11847       - the access method to access rows of the joined table
11848       - whether the join table is an inner table of an outer join or semi-join
11849       - whether the optimizer switches
11850           outer_join_with_cache, semijoin_with_cache, join_cache_incremental,
11851           join_cache_hashed, join_cache_bka,
11852         are set on or off
11853       - the join cache level set for the query
11854       - the join 'options'.
11855 
11856     In any case join buffer is not used if the number of the joined table is
11857     greater than 'no_jbuf_after'. It's also never used if the value of
11858     join_cache_level is equal to 0.
11859     If the optimizer switch outer_join_with_cache is off no join buffer is
11860     used for outer join operations.
11861     If the optimizer switch semijoin_with_cache is off no join buffer is used
11862     for semi-join operations.
11863     If the optimizer switch join_cache_incremental is off no incremental join
11864     buffers are used.
11865     If the optimizer switch join_cache_hashed is off then the optimizer uses
11866     neither BNLH algorithm, nor BKAH algorithm to perform join operations.
11867 
11868     If the optimizer switch join_cache_bka is off then the optimizer uses
11869     neither BKA algorithm, nor BKAH algorithm to perform join operation.
11870     The valid settings for join_cache_level lay in the interval 0..8.
11871     If it set to 0 no join buffers are used to perform join operations.
11872     Currently we differentiate between join caches of 8 levels:
11873       1 : non-incremental join cache used for BNL join algorithm
11874       2 : incremental join cache used for BNL join algorithm
11875       3 : non-incremental join cache used for BNLH join algorithm
11876       4 : incremental join cache used for BNLH join algorithm
11877       5 : non-incremental join cache used for BKA join algorithm
11878       6 : incremental join cache used for BKA join algorithm
11879       7 : non-incremental join cache used for BKAH join algorithm
11880       8 : incremental join cache used for BKAH join algorithm
11881     If the value of join_cache_level is set to n then no join caches of
11882     levels higher than n can be employed.
11883 
11884     If the optimizer switches outer_join_with_cache, semijoin_with_cache,
11885     join_cache_incremental, join_cache_hashed, join_cache_bka are all on
11886     the following rules are applied.
11887     If join_cache_level==1|2 then join buffer is used for inner joins, outer
11888     joins and semi-joins with 'JT_ALL' access method. In this case a
11889     JOIN_CACHE_BNL object is employed.
11890     If join_cache_level==3|4 and then join buffer is used for a join operation
11891     (inner join, outer join, semi-join) with 'JT_REF'/'JT_EQREF' access method
11892     then a JOIN_CACHE_BNLH object is employed.
11893     If an index is used to access rows of the joined table and the value of
11894     join_cache_level==5|6 then a JOIN_CACHE_BKA object is employed.
11895     If an index is used to access rows of the joined table and the value of
11896     join_cache_level==7|8 then a JOIN_CACHE_BKAH object is employed.
11897     If the value of join_cache_level is odd then creation of a non-linked
11898     join cache is forced.
11899 
11900     Currently for any join operation a join cache of the  level of the
11901     highest allowed and applicable level is used.
11902     For example, if join_cache_level is set to 6 and the optimizer switch
11903     join_cache_bka is off, while the optimizer switch join_cache_hashed is
11904     on then for any inner join operation with JT_REF/JT_EQREF access method
11905     to the joined table the BNLH join algorithm will be used, while for
11906     the table accessed by the JT_ALL methods the BNL algorithm will be used.
11907 
11908     If the function decides that a join buffer can be used to join the table
11909     'tab' then it sets the value of tab->use_join_buffer to TRUE and assigns
11910     the selected join cache object to the field 'cache' of the previous
11911     join table.
11912     If the function creates a join cache object it tries to initialize it. The
11913     failure to do this results in an invocation of the function that destructs
11914     the created object.
11915     If the function decides that but some reasons no join buffer can be used
11916     for a table it calls the function revise_cache_usage that checks
11917     whether join cache should be denied for some previous tables. In this case
11918     a pointer to the first table for which join cache usage has been denied
11919     is passed in join->return_val (see the function set_join_cache_denial).
11920 
11921     The functions changes the value the fields tab->icp_other_tables_ok and
11922     tab->idx_cond_fact_out to FALSE if the chosen join cache algorithm
11923     requires it.
11924 
11925   NOTES
11926     An inner table of a nested outer join or a nested semi-join can be currently
11927     joined only when a linked cache object is employed. In these cases setting
11928     join_cache_incremental to 'off' results in denial of usage of any join
11929     buffer when joining the table.
11930     For a nested outer join/semi-join, currently, we either use join buffers for
11931     all inner tables or for none of them.
11932     Some engines (e.g. Falcon) currently allow to use only a join cache
11933     of the type JOIN_CACHE_BKAH when the joined table is accessed through
11934     an index. For these engines setting the value of join_cache_level to 5 or 6
11935     results in that no join buffer is used to join the table.
11936 
11937   RETURN VALUE
11938     cache level if cache is used, otherwise returns 0
11939 
11940   TODO
11941     Support BKA inside SJ-Materialization nests. When doing this, we'll need
11942     to only store sj-inner tables in the join buffer.
11943 #if 0
11944         JOIN_TAB *first_tab= join->join_tab+join->const_tables;
11945         uint n_tables= i-join->const_tables;
11946         / *
11947           We normally put all preceding tables into the join buffer, except
11948           for the constant tables.
11949           If we're inside a semi-join materialization nest, e.g.
11950 
11951              outer_tbl1  outer_tbl2  ( inner_tbl1, inner_tbl2 ) ...
11952                                                        ^-- we're here
11953 
11954           then we need to put into the join buffer only the tables from
11955           within the nest.
11956         * /
11957         if (i >= first_sjm_table && i < last_sjm_table)
11958         {
11959           n_tables= i - first_sjm_table; // will be >0 if we got here
11960           first_tab= join->join_tab + first_sjm_table;
11961         }
11962 #endif
11963 */
11964 
11965 static
check_join_cache_usage(JOIN_TAB * tab,ulonglong options,uint no_jbuf_after,uint table_index,JOIN_TAB * prev_tab)11966 uint check_join_cache_usage(JOIN_TAB *tab,
11967                             ulonglong options,
11968                             uint no_jbuf_after,
11969                             uint table_index,
11970                             JOIN_TAB *prev_tab)
11971 {
11972   Cost_estimate cost;
11973   uint flags= 0;
11974   ha_rows rows= 0;
11975   uint bufsz= 4096;
11976   JOIN_CACHE *prev_cache=0;
11977   JOIN *join= tab->join;
11978   MEM_ROOT *root= join->thd->mem_root;
11979   uint cache_level= tab->used_join_cache_level;
11980   bool force_unlinked_cache=
11981          !(join->allowed_join_cache_types & JOIN_CACHE_INCREMENTAL_BIT);
11982   bool no_hashed_cache=
11983          !(join->allowed_join_cache_types & JOIN_CACHE_HASHED_BIT);
11984   bool no_bka_cache=
11985          !(join->allowed_join_cache_types & JOIN_CACHE_BKA_BIT);
11986 
11987   join->return_tab= 0;
11988 
11989   /*
11990     Don't use join cache if @@join_cache_level==0 or this table is the first
11991     one join suborder (either at top level or inside a bush)
11992   */
11993   if (cache_level == 0 || !prev_tab)
11994     return 0;
11995 
11996   if (force_unlinked_cache && (cache_level%2 == 0))
11997     cache_level--;
11998 
11999   if (options & SELECT_NO_JOIN_CACHE)
12000     goto no_join_cache;
12001 
12002   if (tab->use_quick == 2)
12003     goto no_join_cache;
12004 
12005   if (tab->table->map & join->complex_firstmatch_tables)
12006     goto no_join_cache;
12007 
12008   /*
12009     Don't use join cache if we're inside a join tab range covered by LooseScan
12010     strategy (TODO: LooseScan is very similar to FirstMatch so theoretically it
12011     should be possible to use join buffering in the same way we're using it for
12012     multi-table firstmatch ranges).
12013   */
12014   if (tab->inside_loosescan_range)
12015     goto no_join_cache;
12016 
12017   if (tab->is_inner_table_of_semijoin() &&
12018       !join->allowed_semijoin_with_cache)
12019     goto no_join_cache;
12020   if (tab->is_inner_table_of_outer_join() &&
12021       !join->allowed_outer_join_with_cache)
12022     goto no_join_cache;
12023 
12024   /*
12025     Non-linked join buffers can't guarantee one match
12026   */
12027   if (tab->is_nested_inner())
12028   {
12029     if (force_unlinked_cache || cache_level == 1)
12030       goto no_join_cache;
12031     if (cache_level & 1)
12032       cache_level--;
12033   }
12034 
12035   /*
12036     Don't use BKA for materialized tables. We could actually have a
12037     meaningful use of BKA when linked join buffers are used.
12038 
12039     The problem is, the temp.table is not filled (actually not even opened
12040     properly) yet, and this doesn't let us call
12041     handler->multi_range_read_info(). It is possible to come up with
12042     estimates, etc. without acessing the table, but it seems not to worth the
12043     effort now.
12044   */
12045   if (tab->table->pos_in_table_list->is_materialized_derived())
12046   {
12047     no_bka_cache= true;
12048     /*
12049       Don't use hash join algorithm if the temporary table for the rows
12050       of the derived table will be created with an equi-join key.
12051     */
12052     if (tab->table->s->keys)
12053       no_hashed_cache= true;
12054   }
12055 
12056   /*
12057     Don't use join buffering if we're dictated not to by no_jbuf_after
12058     (This is not meaningfully used currently)
12059   */
12060   if (table_index > no_jbuf_after)
12061     goto no_join_cache;
12062 
12063   /*
12064     TODO: BNL join buffer should be perfectly ok with tab->bush_children.
12065   */
12066   if (tab->loosescan_match_tab || tab->bush_children)
12067     goto no_join_cache;
12068 
12069   for (JOIN_TAB *first_inner= tab->first_inner; first_inner;
12070        first_inner= first_inner->first_upper)
12071   {
12072     if (first_inner != tab &&
12073         (!first_inner->use_join_cache || !(tab-1)->use_join_cache))
12074       goto no_join_cache;
12075   }
12076   if (tab->first_sj_inner_tab && tab->first_sj_inner_tab != tab &&
12077       (!tab->first_sj_inner_tab->use_join_cache || !(tab-1)->use_join_cache))
12078     goto no_join_cache;
12079   if (!prev_tab->use_join_cache)
12080   {
12081     /*
12082       Check whether table tab and the previous one belong to the same nest of
12083       inner tables and if so do not use join buffer when joining table tab.
12084     */
12085     if (tab->first_inner && tab != tab->first_inner)
12086     {
12087       for (JOIN_TAB *first_inner= tab[-1].first_inner;
12088            first_inner;
12089            first_inner= first_inner->first_upper)
12090       {
12091         if (first_inner == tab->first_inner)
12092           goto no_join_cache;
12093       }
12094     }
12095     else if (tab->first_sj_inner_tab && tab != tab->first_sj_inner_tab &&
12096              tab->first_sj_inner_tab == tab[-1].first_sj_inner_tab)
12097       goto no_join_cache;
12098   }
12099 
12100   prev_cache= prev_tab->cache;
12101 
12102   switch (tab->type) {
12103   case JT_ALL:
12104     if (cache_level == 1)
12105       prev_cache= 0;
12106     if ((tab->cache= new (root) JOIN_CACHE_BNL(join, tab, prev_cache)))
12107     {
12108       tab->icp_other_tables_ok= FALSE;
12109       /* If make_join_select() hasn't called make_scan_filter(), do it now */
12110       if (!tab->cache_select && tab->make_scan_filter())
12111         goto no_join_cache;
12112       return (2 - MY_TEST(!prev_cache));
12113     }
12114     goto no_join_cache;
12115   case JT_SYSTEM:
12116   case JT_CONST:
12117   case JT_REF:
12118   case JT_EQ_REF:
12119     if (cache_level <=2 || (no_hashed_cache && no_bka_cache))
12120       goto no_join_cache;
12121     if (tab->ref.is_access_triggered())
12122       goto no_join_cache;
12123 
12124     if (!tab->is_ref_for_hash_join() && !no_bka_cache)
12125     {
12126       flags= HA_MRR_NO_NULL_ENDPOINTS | HA_MRR_SINGLE_POINT;
12127       if (tab->table->covering_keys.is_set(tab->ref.key))
12128         flags|= HA_MRR_INDEX_ONLY;
12129       rows= tab->table->file->multi_range_read_info(tab->ref.key, 10, 20,
12130                                                     tab->ref.key_parts,
12131                                                     &bufsz, &flags, &cost);
12132     }
12133 
12134     if ((cache_level <=4 && !no_hashed_cache) || no_bka_cache ||
12135         tab->is_ref_for_hash_join() ||
12136 	((flags & HA_MRR_NO_ASSOCIATION) && cache_level <=6))
12137     {
12138       if (!tab->hash_join_is_possible() ||
12139           tab->make_scan_filter())
12140         goto no_join_cache;
12141       if (cache_level == 3)
12142         prev_cache= 0;
12143       if ((tab->cache= new (root) JOIN_CACHE_BNLH(join, tab, prev_cache)))
12144       {
12145         tab->icp_other_tables_ok= FALSE;
12146         return (4 - MY_TEST(!prev_cache));
12147       }
12148       goto no_join_cache;
12149     }
12150     if (cache_level > 4 && no_bka_cache)
12151       goto no_join_cache;
12152 
12153     if ((flags & HA_MRR_NO_ASSOCIATION) &&
12154 	(cache_level <= 6 || no_hashed_cache))
12155       goto no_join_cache;
12156 
12157     if ((rows != HA_POS_ERROR) && !(flags & HA_MRR_USE_DEFAULT_IMPL))
12158     {
12159       if (cache_level <= 6 || no_hashed_cache)
12160       {
12161         if (cache_level == 5)
12162           prev_cache= 0;
12163         if ((tab->cache= new (root) JOIN_CACHE_BKA(join, tab, flags, prev_cache)))
12164           return (6 - MY_TEST(!prev_cache));
12165         goto no_join_cache;
12166       }
12167       else
12168       {
12169         if (cache_level == 7)
12170           prev_cache= 0;
12171         if ((tab->cache= new (root) JOIN_CACHE_BKAH(join, tab, flags, prev_cache)))
12172 	{
12173           tab->idx_cond_fact_out= FALSE;
12174           return (8 - MY_TEST(!prev_cache));
12175         }
12176         goto no_join_cache;
12177       }
12178     }
12179     goto no_join_cache;
12180   default : ;
12181   }
12182 
12183 no_join_cache:
12184   if (tab->type != JT_ALL && tab->is_ref_for_hash_join())
12185   {
12186     tab->type= JT_ALL;
12187     tab->ref.key_parts= 0;
12188   }
12189   revise_cache_usage(tab);
12190   return 0;
12191 }
12192 
12193 
12194 /*
12195   Check whether join buffers can be used to join tables of a join
12196 
12197   SYNOPSIS
12198     check_join_cache_usage()
12199       join                join whose tables are to be checked
12200       options             options of the join
12201       no_jbuf_after       don't use join buffering after table with this number
12202                           (The tables are assumed to be numbered in
12203                           first_linear_tab(join, WITHOUT_CONST_TABLES),
12204                           next_linear_tab(join, WITH_CONST_TABLES) order).
12205 
12206   DESCRIPTION
12207     For each table after the first non-constant table the function checks
12208     whether the table can be joined using a join buffer. If the function decides
12209     that a join buffer can be employed then it selects the most appropriate join
12210     cache object that contains this join buffer whose level is not greater
12211     than join_cache_level set for the join. To make this check the function
12212     calls the function check_join_cache_usage for every non-constant table.
12213 
12214   NOTES
12215     In some situations (e.g. for nested outer joins, for nested semi-joins) only
12216     incremental buffers can be used. If it turns out that for some inner table
12217     no join buffer can be used then any inner table of an outer/semi-join nest
12218     cannot use join buffer. In the case when already chosen buffer must be
12219     denied for a table the function recalls check_join_cache_usage()
12220     starting from this table. The pointer to the table from which the check
12221     has to be restarted is returned in join->return_val (see the description
12222     of check_join_cache_usage).
12223 */
12224 
check_join_cache_usage_for_tables(JOIN * join,ulonglong options,uint no_jbuf_after)12225 void check_join_cache_usage_for_tables(JOIN *join, ulonglong options,
12226                                        uint no_jbuf_after)
12227 {
12228   JOIN_TAB *tab;
12229   JOIN_TAB *prev_tab;
12230 
12231   for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
12232        tab;
12233        tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
12234   {
12235     tab->used_join_cache_level= join->max_allowed_join_cache_level;
12236   }
12237 
12238   uint idx= join->const_tables;
12239   for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
12240        tab;
12241        tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
12242   {
12243 restart:
12244     tab->icp_other_tables_ok= TRUE;
12245     tab->idx_cond_fact_out= TRUE;
12246 
12247     /*
12248       Check if we have a preceding join_tab, as something that will feed us
12249       records that we could buffer. We don't have it, if
12250        - this is the first non-const table in the join order,
12251        - this is the first table inside an SJM nest.
12252     */
12253     prev_tab= tab - 1;
12254     if (tab == join->join_tab + join->const_tables ||
12255         (tab->bush_root_tab && tab->bush_root_tab->bush_children->start == tab))
12256       prev_tab= NULL;
12257 
12258     switch (tab->type) {
12259     case JT_SYSTEM:
12260     case JT_CONST:
12261     case JT_EQ_REF:
12262     case JT_REF:
12263     case JT_REF_OR_NULL:
12264     case JT_ALL:
12265       tab->used_join_cache_level= check_join_cache_usage(tab, options,
12266                                                          no_jbuf_after,
12267                                                          idx,
12268                                                          prev_tab);
12269       tab->use_join_cache= MY_TEST(tab->used_join_cache_level);
12270       /*
12271         psergey-merge: todo: raise the question that this is really stupid that
12272         we can first allocate a join buffer, then decide not to use it and free
12273         it.
12274       */
12275       if (join->return_tab)
12276       {
12277         tab= join->return_tab;
12278         goto restart;
12279       }
12280       break;
12281     default:
12282       tab->used_join_cache_level= 0;
12283     }
12284     if (!tab->bush_children)
12285       idx++;
12286   }
12287 }
12288 
12289 /**
12290   Remove pushdown conditions that are already checked by the scan phase
12291   of BNL/BNLH joins.
12292 
12293   @note
12294   If the single-table condition for this table will be used by a
12295   blocked join to pre-filter this table's rows, there is no need
12296   to re-check the same single-table condition for each joined record.
12297 
12298   This method removes from JOIN_TAB::select_cond and JOIN_TAB::select::cond
12299   all top-level conjuncts that also appear in in JOIN_TAB::cache_select::cond.
12300 */
12301 
remove_redundant_bnl_scan_conds()12302 void JOIN_TAB::remove_redundant_bnl_scan_conds()
12303 {
12304   if (!(select_cond && cache_select && cache &&
12305         (cache->get_join_alg() == JOIN_CACHE::BNL_JOIN_ALG ||
12306          cache->get_join_alg() == JOIN_CACHE::BNLH_JOIN_ALG)))
12307     return;
12308 
12309   /*
12310     select->cond is not processed separately. This method assumes it is always
12311     the same as select_cond.
12312   */
12313   if (select && select->cond != select_cond)
12314     return;
12315 
12316   if (is_cond_and(select_cond))
12317   {
12318     List_iterator<Item> pushed_cond_li(*((Item_cond*) select_cond)->argument_list());
12319     Item *pushed_item;
12320     Item_cond_and *reduced_select_cond= new (join->thd->mem_root)
12321       Item_cond_and(join->thd);
12322 
12323     if (is_cond_and(cache_select->cond))
12324     {
12325       List_iterator<Item> scan_cond_li(*((Item_cond*) cache_select->cond)->argument_list());
12326       Item *scan_item;
12327       while ((pushed_item= pushed_cond_li++))
12328       {
12329         bool found_cond= false;
12330         scan_cond_li.rewind();
12331         while ((scan_item= scan_cond_li++))
12332         {
12333           if (pushed_item->eq(scan_item, 0))
12334           {
12335             found_cond= true;
12336             break;
12337           }
12338         }
12339         if (!found_cond)
12340           reduced_select_cond->add(pushed_item, join->thd->mem_root);
12341       }
12342     }
12343     else
12344     {
12345       while ((pushed_item= pushed_cond_li++))
12346       {
12347         if (!pushed_item->eq(cache_select->cond, 0))
12348           reduced_select_cond->add(pushed_item, join->thd->mem_root);
12349       }
12350     }
12351 
12352     /*
12353       JOIN_CACHE::check_match uses JOIN_TAB::select->cond instead of
12354       JOIN_TAB::select_cond. set_cond() sets both pointers.
12355     */
12356     if (reduced_select_cond->argument_list()->is_empty())
12357       set_cond(NULL);
12358     else if (reduced_select_cond->argument_list()->elements == 1)
12359       set_cond(reduced_select_cond->argument_list()->head());
12360     else
12361     {
12362       reduced_select_cond->quick_fix_field();
12363       set_cond(reduced_select_cond);
12364     }
12365   }
12366   else if (select_cond->eq(cache_select->cond, 0))
12367     set_cond(NULL);
12368 }
12369 
12370 
12371 /*
12372   Plan refinement stage: do various setup things for the executor
12373 
12374   SYNOPSIS
12375     make_join_readinfo()
12376       join           Join being processed
12377       options        Join's options (checking for SELECT_DESCRIBE,
12378                      SELECT_NO_JOIN_CACHE)
12379       no_jbuf_after  Don't use join buffering after table with this number.
12380 
12381   DESCRIPTION
12382     Plan refinement stage: do various set ups for the executioner
12383       - set up use of join buffering
12384       - push index conditions
12385       - increment relevant counters
12386       - etc
12387 
12388   RETURN
12389     FALSE - OK
12390     TRUE  - Out of memory
12391 */
12392 
12393 static bool
make_join_readinfo(JOIN * join,ulonglong options,uint no_jbuf_after)12394 make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after)
12395 {
12396   JOIN_TAB *tab;
12397   uint i;
12398   DBUG_ENTER("make_join_readinfo");
12399 
12400   bool statistics= MY_TEST(!(join->select_options & SELECT_DESCRIBE));
12401   bool sorted= 1;
12402 
12403   join->complex_firstmatch_tables= table_map(0);
12404 
12405   if (!join->select_lex->sj_nests.is_empty() &&
12406       setup_semijoin_dups_elimination(join, options, no_jbuf_after))
12407     DBUG_RETURN(TRUE); /* purecov: inspected */
12408 
12409   /* For const tables, set partial_join_cardinality to 1. */
12410   for (tab= join->join_tab; tab != join->join_tab + join->const_tables; tab++)
12411     tab->partial_join_cardinality= 1;
12412 
12413   JOIN_TAB *prev_tab= NULL;
12414   i= join->const_tables;
12415   for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
12416        tab;
12417        prev_tab=tab, tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
12418   {
12419     /*
12420       The approximation below for partial join cardinality is not good because
12421         - it does not take into account some pushdown predicates
12422         - it does not differentiate between inner joins, outer joins and
12423         semi-joins.
12424       Later it should be improved.
12425     */
12426 
12427     if (tab->bush_root_tab && tab->bush_root_tab->bush_children->start == tab)
12428       prev_tab= NULL;
12429     DBUG_ASSERT(tab->bush_children || tab->table == join->best_positions[i].table->table);
12430 
12431     tab->partial_join_cardinality= join->best_positions[i].records_read *
12432                                    (prev_tab? prev_tab->partial_join_cardinality : 1);
12433     if (!tab->bush_children)
12434       i++;
12435   }
12436 
12437   check_join_cache_usage_for_tables(join, options, no_jbuf_after);
12438 
12439   JOIN_TAB *first_tab;
12440   for (tab= first_tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
12441        tab;
12442        tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
12443   {
12444     if (tab->bush_children)
12445     {
12446       if (setup_sj_materialization_part2(tab))
12447         return TRUE;
12448     }
12449 
12450     TABLE *table=tab->table;
12451     uint jcl= tab->used_join_cache_level;
12452     tab->read_record.table= table;
12453     tab->read_record.unlock_row= rr_unlock_row;
12454     tab->sorted= sorted;
12455     sorted= 0;                                  // only first must be sorted
12456 
12457 
12458     /*
12459       We should not set tab->next_select for the last table in the
12460       SMJ-nest, as setup_sj_materialization() has already set it to
12461       end_sj_materialize.
12462     */
12463     if (!(tab->bush_root_tab &&
12464           tab->bush_root_tab->bush_children->end == tab + 1))
12465     {
12466       tab->next_select=sub_select;		/* normal select */
12467     }
12468 
12469 
12470     if (tab->loosescan_match_tab)
12471     {
12472       if (!(tab->loosescan_buf= (uchar*)join->thd->alloc(tab->
12473                                                          loosescan_key_len)))
12474         return TRUE; /* purecov: inspected */
12475       tab->sorted= TRUE;
12476     }
12477     table->status=STATUS_NO_RECORD;
12478     pick_table_access_method (tab);
12479 
12480     if (jcl)
12481        tab[-1].next_select=sub_select_cache;
12482 
12483     if (tab->cache && tab->cache->get_join_alg() == JOIN_CACHE::BNLH_JOIN_ALG)
12484       tab->type= JT_HASH;
12485 
12486     switch (tab->type) {
12487     case JT_SYSTEM:				// Only happens with left join
12488     case JT_CONST:				// Only happens with left join
12489       /* Only happens with outer joins */
12490       tab->read_first_record= tab->type == JT_SYSTEM ? join_read_system
12491                                                      : join_read_const;
12492       if (table->covering_keys.is_set(tab->ref.key) && !table->no_keyread)
12493         table->file->ha_start_keyread(tab->ref.key);
12494       else if ((!jcl || jcl > 4) && !tab->ref.is_access_triggered())
12495         push_index_cond(tab, tab->ref.key);
12496       break;
12497     case JT_EQ_REF:
12498       tab->read_record.unlock_row= join_read_key_unlock_row;
12499       /* fall through */
12500       if (table->covering_keys.is_set(tab->ref.key) && !table->no_keyread)
12501         table->file->ha_start_keyread(tab->ref.key);
12502       else if ((!jcl || jcl > 4) && !tab->ref.is_access_triggered())
12503         push_index_cond(tab, tab->ref.key);
12504       break;
12505     case JT_REF_OR_NULL:
12506     case JT_REF:
12507       if (tab->select)
12508       {
12509 	delete tab->select->quick;
12510 	tab->select->quick=0;
12511       }
12512       delete tab->quick;
12513       tab->quick=0;
12514       if (table->covering_keys.is_set(tab->ref.key) && !table->no_keyread)
12515         table->file->ha_start_keyread(tab->ref.key);
12516       else if ((!jcl || jcl > 4) && !tab->ref.is_access_triggered())
12517         push_index_cond(tab, tab->ref.key);
12518       break;
12519     case JT_ALL:
12520     case JT_HASH:
12521       /*
12522 	If previous table use cache
12523         If the incoming data set is already sorted don't use cache.
12524         Also don't use cache if this is the first table in semi-join
12525           materialization nest.
12526       */
12527       /* These init changes read_record */
12528       if (tab->use_quick == 2)
12529       {
12530         join->thd->set_status_no_good_index_used();
12531 	tab->read_first_record= join_init_quick_read_record;
12532 	if (statistics)
12533 	  join->thd->inc_status_select_range_check();
12534       }
12535       else
12536       {
12537         if (!tab->bush_children)
12538           tab->read_first_record= join_init_read_record;
12539 	if (tab == first_tab)
12540 	{
12541 	  if (tab->select && tab->select->quick)
12542 	  {
12543 	    if (statistics)
12544 	      join->thd->inc_status_select_range();
12545 	  }
12546 	  else
12547 	  {
12548             join->thd->set_status_no_index_used();
12549 	    if (statistics)
12550 	    {
12551               join->thd->inc_status_select_scan();
12552 	      join->thd->query_plan_flags|= QPLAN_FULL_SCAN;
12553 	    }
12554 	  }
12555 	}
12556 	else
12557 	{
12558 	  if (tab->select && tab->select->quick)
12559 	  {
12560 	    if (statistics)
12561               join->thd->inc_status_select_full_range_join();
12562 	  }
12563 	  else
12564 	  {
12565             join->thd->set_status_no_index_used();
12566 	    if (statistics)
12567 	    {
12568               join->thd->inc_status_select_full_join();
12569 	      join->thd->query_plan_flags|= QPLAN_FULL_JOIN;
12570 	    }
12571 	  }
12572 	}
12573 	if (!table->no_keyread)
12574 	{
12575 	  if (tab->select && tab->select->quick &&
12576               tab->select->quick->index != MAX_KEY && //not index_merge
12577 	      table->covering_keys.is_set(tab->select->quick->index))
12578             table->file->ha_start_keyread(tab->select->quick->index);
12579 	  else if (!table->covering_keys.is_clear_all() &&
12580 		   !(tab->select && tab->select->quick))
12581 	  {					// Only read index tree
12582             if (tab->loosescan_match_tab)
12583               tab->index= tab->loosescan_key;
12584             else
12585             {
12586 #ifdef BAD_OPTIMIZATION
12587               /*
12588                 It has turned out that the below change, while speeding things
12589                 up for disk-bound loads, slows them down for cases when the data
12590                 is in disk cache (see BUG#35850):
12591                 See bug #26447: "Using the clustered index for a table scan
12592                 is always faster than using a secondary index".
12593               */
12594               if (table->s->primary_key != MAX_KEY &&
12595                   table->file->primary_key_is_clustered())
12596                 tab->index= table->s->primary_key;
12597               else
12598 #endif
12599                 tab->index=find_shortest_key(table, & table->covering_keys);
12600             }
12601 	    tab->read_first_record= join_read_first;
12602             /* Read with index_first / index_next */
12603 	    tab->type= tab->type == JT_ALL ? JT_NEXT : JT_HASH_NEXT;
12604 	  }
12605 	}
12606         if (tab->select && tab->select->quick &&
12607             tab->select->quick->index != MAX_KEY &&
12608             !tab->table->file->keyread_enabled())
12609           push_index_cond(tab, tab->select->quick->index);
12610       }
12611       break;
12612     case JT_FT:
12613       break;
12614       /* purecov: begin deadcode */
12615     default:
12616       DBUG_PRINT("error",("Table type %d found",tab->type));
12617       break;
12618     case JT_UNKNOWN:
12619     case JT_MAYBE_REF:
12620       abort();
12621       /* purecov: end */
12622     }
12623 
12624     DBUG_EXECUTE("where",
12625                  char buff[256];
12626                  String str(buff,sizeof(buff),system_charset_info);
12627                  str.length(0);
12628                  str.append(tab->table? tab->table->alias.c_ptr() :"<no_table_name>");
12629                  str.append(" final_pushdown_cond");
12630                  print_where(tab->select_cond, str.c_ptr_safe(), QT_ORDINARY););
12631   }
12632   uint n_top_tables= (uint)(join->join_tab_ranges.head()->end -
12633                      join->join_tab_ranges.head()->start);
12634 
12635   join->join_tab[n_top_tables - 1].next_select=0;  /* Set by do_select */
12636 
12637   /*
12638     If a join buffer is used to join a table the ordering by an index
12639     for the first non-constant table cannot be employed anymore.
12640   */
12641   for (tab= join->join_tab + join->const_tables ;
12642        tab != join->join_tab + n_top_tables ; tab++)
12643   {
12644     if (tab->use_join_cache)
12645     {
12646        JOIN_TAB *sort_by_tab= join->group && join->simple_group &&
12647                               join->group_list ?
12648 			       join->join_tab+join->const_tables :
12649                                join->get_sort_by_join_tab();
12650       /*
12651         It could be that sort_by_tab==NULL, and the plan is to use filesort()
12652         on the first table.
12653       */
12654       if (join->order)
12655       {
12656         join->simple_order= 0;
12657         join->need_tmp= 1;
12658       }
12659 
12660       if (join->group && !join->group_optimized_away)
12661       {
12662         join->need_tmp= 1;
12663         join->simple_group= 0;
12664       }
12665 
12666       if (sort_by_tab)
12667       {
12668         join->need_tmp= 1;
12669         join->simple_order= join->simple_group= 0;
12670         if (sort_by_tab->type == JT_NEXT &&
12671             !sort_by_tab->table->covering_keys.is_set(sort_by_tab->index))
12672         {
12673           sort_by_tab->type= JT_ALL;
12674           sort_by_tab->read_first_record= join_init_read_record;
12675         }
12676         else if (sort_by_tab->type == JT_HASH_NEXT &&
12677                  !sort_by_tab->table->covering_keys.is_set(sort_by_tab->index))
12678         {
12679           sort_by_tab->type= JT_HASH;
12680           sort_by_tab->read_first_record= join_init_read_record;
12681         }
12682       }
12683       break;
12684     }
12685   }
12686 
12687   DBUG_RETURN(FALSE);
12688 }
12689 
12690 
12691 /**
12692   Give error if we some tables are done with a full join.
12693 
12694   This is used by multi_table_update and multi_table_delete when running
12695   in safe mode.
12696 
12697   @param join		Join condition
12698 
12699   @retval
12700     0	ok
12701   @retval
12702     1	Error (full join used)
12703 */
12704 
error_if_full_join(JOIN * join)12705 bool error_if_full_join(JOIN *join)
12706 {
12707   for (JOIN_TAB *tab=first_top_level_tab(join, WITH_CONST_TABLES); tab;
12708        tab= next_top_level_tab(join, tab))
12709   {
12710     if (tab->type == JT_ALL && (!tab->select || !tab->select->quick))
12711     {
12712       my_message(ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE,
12713                  ER_THD(join->thd,
12714                         ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE), MYF(0));
12715       return(1);
12716     }
12717   }
12718   return(0);
12719 }
12720 
12721 
12722 /**
12723   cleanup JOIN_TAB.
12724 
12725   DESCRIPTION
12726     This is invoked when we've finished all join executions.
12727 */
12728 
cleanup()12729 void JOIN_TAB::cleanup()
12730 {
12731   DBUG_ENTER("JOIN_TAB::cleanup");
12732 
12733   DBUG_PRINT("enter", ("tab: %p  table %s.%s",
12734                        this,
12735                        (table ? table->s->db.str : "?"),
12736                        (table ? table->s->table_name.str : "?")));
12737   delete select;
12738   select= 0;
12739   delete quick;
12740   quick= 0;
12741   if (cache)
12742   {
12743     cache->free();
12744     cache= 0;
12745   }
12746   limit= 0;
12747   // Free select that was created for filesort outside of create_sort_index
12748   if (filesort && filesort->select && !filesort->own_select)
12749     delete filesort->select;
12750   delete filesort;
12751   filesort= NULL;
12752   /* Skip non-existing derived tables/views result tables */
12753   if (table &&
12754       (table->s->tmp_table != INTERNAL_TMP_TABLE || table->is_created()))
12755   {
12756     table->file->ha_end_keyread();
12757     table->file->ha_index_or_rnd_end();
12758   }
12759   if (table)
12760   {
12761     table->file->ha_end_keyread();
12762     if (type == JT_FT)
12763       table->file->ha_ft_end();
12764     else
12765       table->file->ha_index_or_rnd_end();
12766     preread_init_done= FALSE;
12767     if (table->pos_in_table_list &&
12768         table->pos_in_table_list->jtbm_subselect)
12769     {
12770       if (table->pos_in_table_list->jtbm_subselect->is_jtbm_const_tab)
12771       {
12772         /*
12773           Set this to NULL so that cleanup_empty_jtbm_semi_joins() doesn't
12774           attempt to make another free_tmp_table call.
12775         */
12776         table->pos_in_table_list->table= NULL;
12777         free_tmp_table(join->thd, table);
12778         table= NULL;
12779       }
12780       else
12781       {
12782         TABLE_LIST *tmp= table->pos_in_table_list;
12783         end_read_record(&read_record);
12784         tmp->jtbm_subselect->cleanup();
12785         /*
12786           The above call freed the materializedd temptable. Set it to NULL so
12787           that we don't attempt to touch it if JOIN_TAB::cleanup() is invoked
12788           multiple times (it may be)
12789         */
12790         tmp->table= NULL;
12791         table= NULL;
12792       }
12793       DBUG_VOID_RETURN;
12794     }
12795     /*
12796       We need to reset this for next select
12797       (Tested in part_of_refkey)
12798     */
12799     table->reginfo.join_tab= 0;
12800   }
12801   end_read_record(&read_record);
12802   explain_plan= NULL;
12803   DBUG_VOID_RETURN;
12804 }
12805 
12806 
12807 /**
12808   Estimate the time to get rows of the joined table
12809 */
12810 
scan_time()12811 double JOIN_TAB::scan_time()
12812 {
12813   double res;
12814   if (table->is_created())
12815   {
12816     if (table->is_filled_at_execution())
12817     {
12818       get_delayed_table_estimates(table, &records, &read_time,
12819                                     &startup_cost);
12820       found_records= records;
12821       table->quick_condition_rows= records;
12822     }
12823     else
12824     {
12825       found_records= records= table->stat_records();
12826       read_time= table->file->scan_time();
12827       /*
12828         table->quick_condition_rows has already been set to
12829         table->file->stats.records
12830       */
12831     }
12832     res= read_time;
12833   }
12834   else
12835   {
12836     found_records= records=table->stat_records();
12837     read_time= found_records ? (double)found_records: 10.0;// TODO:fix this stub
12838     res= read_time;
12839   }
12840   return res;
12841 }
12842 
12843 
12844 /**
12845   Estimate the number of rows that a an access method will read from a table.
12846 
12847   @todo: why not use JOIN_TAB::found_records
12848 */
12849 
get_examined_rows()12850 ha_rows JOIN_TAB::get_examined_rows()
12851 {
12852   double examined_rows;
12853   SQL_SELECT *sel= filesort? filesort->select : this->select;
12854 
12855   if (sel && sel->quick && use_quick != 2)
12856     examined_rows= (double)sel->quick->records;
12857   else if (type == JT_NEXT || type == JT_ALL ||
12858            type == JT_HASH || type ==JT_HASH_NEXT)
12859   {
12860     if (limit)
12861     {
12862       /*
12863         @todo This estimate is wrong, a LIMIT query may examine much more rows
12864         than the LIMIT itself.
12865       */
12866       examined_rows= (double)limit;
12867     }
12868     else
12869     {
12870       if (table->is_filled_at_execution())
12871         examined_rows= (double)records;
12872       else
12873       {
12874         /*
12875           handler->info(HA_STATUS_VARIABLE) has been called in
12876           make_join_statistics()
12877         */
12878         examined_rows= (double)table->stat_records();
12879       }
12880     }
12881   }
12882   else
12883     examined_rows= records_read;
12884 
12885   if (examined_rows >= (double) HA_ROWS_MAX)
12886     return HA_ROWS_MAX;
12887   return (ha_rows) examined_rows;
12888 }
12889 
12890 
12891 /**
12892   Initialize the join_tab before reading.
12893   Currently only derived table/view materialization is done here.
12894 
12895   TODO: consider moving this together with join_tab_execution_startup
12896 */
preread_init()12897 bool JOIN_TAB::preread_init()
12898 {
12899   TABLE_LIST *derived= table->pos_in_table_list;
12900   DBUG_ENTER("JOIN_TAB::preread_init");
12901 
12902   if (!derived || !derived->is_materialized_derived())
12903   {
12904     preread_init_done= TRUE;
12905     DBUG_RETURN(FALSE);
12906   }
12907 
12908   /* Materialize derived table/view. */
12909   if ((!derived->get_unit()->executed  ||
12910        derived->is_recursive_with_table() ||
12911        derived->get_unit()->uncacheable) &&
12912       mysql_handle_single_derived(join->thd->lex,
12913                                     derived, DT_CREATE | DT_FILL))
12914     DBUG_RETURN(TRUE);
12915 
12916   if (!(derived->get_unit()->uncacheable & UNCACHEABLE_DEPENDENT) ||
12917       derived->is_nonrecursive_derived_with_rec_ref())
12918     preread_init_done= TRUE;
12919   if (select && select->quick)
12920     select->quick->replace_handler(table->file);
12921 
12922   DBUG_EXECUTE_IF("show_explain_probe_join_tab_preread",
12923                   if (dbug_user_var_equals_int(join->thd,
12924                                                "show_explain_probe_select_id",
12925                                                join->select_lex->select_number))
12926                         dbug_serve_apcs(join->thd, 1);
12927                  );
12928 
12929   /* init ftfuns for just initialized derived table */
12930   if (table->fulltext_searched)
12931     if (init_ftfuncs(join->thd, join->select_lex, MY_TEST(join->order)))
12932       DBUG_RETURN(TRUE);
12933 
12934   DBUG_RETURN(FALSE);
12935 }
12936 
12937 
12938 /**
12939   Build a TABLE_REF structure for index lookup in the temporary table
12940 
12941   @param thd             Thread handle
12942   @param tmp_key         The temporary table key
12943   @param it              The iterator of items for lookup in the key
12944   @param skip            Number of fields from the beginning to skip
12945 
12946   @details
12947   Build TABLE_REF object for lookup in the key 'tmp_key' using items
12948   accessible via item iterator 'it'.
12949 
12950   @retval TRUE  Error
12951   @retval FALSE OK
12952 */
12953 
tmp_table_index_lookup_init(THD * thd,KEY * tmp_key,Item_iterator & it,bool value,uint skip)12954 bool TABLE_REF::tmp_table_index_lookup_init(THD *thd,
12955                                             KEY *tmp_key,
12956                                             Item_iterator &it,
12957                                             bool value,
12958                                             uint skip)
12959 {
12960   uint tmp_key_parts= tmp_key->user_defined_key_parts;
12961   uint i;
12962   DBUG_ENTER("TABLE_REF::tmp_table_index_lookup_init");
12963 
12964   key= 0; /* The only temp table index. */
12965   key_length= tmp_key->key_length;
12966   if (!(key_buff=
12967         (uchar*) thd->calloc(ALIGN_SIZE(tmp_key->key_length) * 2)) ||
12968       !(key_copy=
12969         (store_key**) thd->alloc((sizeof(store_key*) *
12970                                   (tmp_key_parts + 1)))) ||
12971       !(items=
12972         (Item**) thd->alloc(sizeof(Item*) * tmp_key_parts)))
12973     DBUG_RETURN(TRUE);
12974 
12975   key_buff2= key_buff + ALIGN_SIZE(tmp_key->key_length);
12976 
12977   KEY_PART_INFO *cur_key_part= tmp_key->key_part;
12978   store_key **ref_key= key_copy;
12979   uchar *cur_ref_buff= key_buff;
12980 
12981   it.open();
12982   for (i= 0; i < skip; i++) it.next();
12983   for (i= 0; i < tmp_key_parts; i++, cur_key_part++, ref_key++)
12984   {
12985     Item *item= it.next();
12986     DBUG_ASSERT(item);
12987     items[i]= item;
12988     int null_count= MY_TEST(cur_key_part->field->real_maybe_null());
12989     *ref_key= new store_key_item(thd, cur_key_part->field,
12990                                  /* TIMOUR:
12991                                     the NULL byte is taken into account in
12992                                     cur_key_part->store_length, so instead of
12993                                     cur_ref_buff + MY_TEST(maybe_null), we could
12994                                     use that information instead.
12995                                  */
12996                                  cur_ref_buff + null_count,
12997                                  null_count ? cur_ref_buff : 0,
12998                                  cur_key_part->length, items[i], value);
12999     cur_ref_buff+= cur_key_part->store_length;
13000   }
13001   *ref_key= NULL; /* End marker. */
13002   key_err= 1;
13003   key_parts= tmp_key_parts;
13004   DBUG_RETURN(FALSE);
13005 }
13006 
13007 
13008 /*
13009   Check if ref access uses "Full scan on NULL key" (i.e. it actually alternates
13010   between ref access and full table scan)
13011 */
13012 
is_access_triggered()13013 bool TABLE_REF::is_access_triggered()
13014 {
13015   for (uint i = 0; i < key_parts; i++)
13016   {
13017     if (cond_guards[i])
13018       return TRUE;
13019   }
13020   return FALSE;
13021 }
13022 
13023 
13024 /**
13025   Partially cleanup JOIN after it has executed: close index or rnd read
13026   (table cursors), free quick selects.
13027 
13028     This function is called in the end of execution of a JOIN, before the used
13029     tables are unlocked and closed.
13030 
13031     For a join that is resolved using a temporary table, the first sweep is
13032     performed against actual tables and an intermediate result is inserted
13033     into the temprorary table.
13034     The last sweep is performed against the temporary table. Therefore,
13035     the base tables and associated buffers used to fill the temporary table
13036     are no longer needed, and this function is called to free them.
13037 
13038     For a join that is performed without a temporary table, this function
13039     is called after all rows are sent, but before EOF packet is sent.
13040 
13041     For a simple SELECT with no subqueries this function performs a full
13042     cleanup of the JOIN and calls mysql_unlock_read_tables to free used base
13043     tables.
13044 
13045     If a JOIN is executed for a subquery or if it has a subquery, we can't
13046     do the full cleanup and need to do a partial cleanup only.
13047     - If a JOIN is not the top level join, we must not unlock the tables
13048     because the outer select may not have been evaluated yet, and we
13049     can't unlock only selected tables of a query.
13050     - Additionally, if this JOIN corresponds to a correlated subquery, we
13051     should not free quick selects and join buffers because they will be
13052     needed for the next execution of the correlated subquery.
13053     - However, if this is a JOIN for a [sub]select, which is not
13054     a correlated subquery itself, but has subqueries, we can free it
13055     fully and also free JOINs of all its subqueries. The exception
13056     is a subquery in SELECT list, e.g: @n
13057     SELECT a, (select MY_MAX(b) from t1) group by c @n
13058     This subquery will not be evaluated at first sweep and its value will
13059     not be inserted into the temporary table. Instead, it's evaluated
13060     when selecting from the temporary table. Therefore, it can't be freed
13061     here even though it's not correlated.
13062 
13063   @todo
13064     Unlock tables even if the join isn't top level select in the tree
13065 */
13066 
join_free()13067 void JOIN::join_free()
13068 {
13069   SELECT_LEX_UNIT *tmp_unit;
13070   SELECT_LEX *sl;
13071   /*
13072     Optimization: if not EXPLAIN and we are done with the JOIN,
13073     free all tables.
13074   */
13075   bool full= !(select_lex->uncacheable) &&  !(thd->lex->describe);
13076   bool can_unlock= full;
13077   DBUG_ENTER("JOIN::join_free");
13078 
13079   cleanup(full);
13080 
13081   for (tmp_unit= select_lex->first_inner_unit();
13082        tmp_unit;
13083        tmp_unit= tmp_unit->next_unit())
13084   {
13085     if (tmp_unit->with_element && tmp_unit->with_element->is_recursive)
13086       continue;
13087     for (sl= tmp_unit->first_select(); sl; sl= sl->next_select())
13088     {
13089       Item_subselect *subselect= sl->master_unit()->item;
13090       bool full_local= full && (!subselect || subselect->is_evaluated());
13091       /*
13092         If this join is evaluated, we can fully clean it up and clean up all
13093         its underlying joins even if they are correlated -- they will not be
13094         used any more anyway.
13095         If this join is not yet evaluated, we still must clean it up to
13096         close its table cursors -- it may never get evaluated, as in case of
13097         ... HAVING FALSE OR a IN (SELECT ...))
13098         but all table cursors must be closed before the unlock.
13099       */
13100       sl->cleanup_all_joins(full_local);
13101       /* Can't unlock if at least one JOIN is still needed */
13102       can_unlock= can_unlock && full_local;
13103     }
13104   }
13105   /*
13106     We are not using tables anymore
13107     Unlock all tables. We may be in an INSERT .... SELECT statement.
13108   */
13109   if (can_unlock && lock && thd->lock && ! thd->locked_tables_mode &&
13110       !(select_options & SELECT_NO_UNLOCK) &&
13111       !select_lex->subquery_in_having &&
13112       (select_lex == (thd->lex->unit.fake_select_lex ?
13113                       thd->lex->unit.fake_select_lex : &thd->lex->select_lex)))
13114   {
13115     /*
13116       TODO: unlock tables even if the join isn't top level select in the
13117       tree.
13118     */
13119     mysql_unlock_read_tables(thd, lock);           // Don't free join->lock
13120     lock= 0;
13121   }
13122 
13123   DBUG_VOID_RETURN;
13124 }
13125 
13126 
13127 /**
13128   Free resources of given join.
13129 
13130   @param full   true if we should free all resources, call with full==1
13131                 should be last, before it this function can be called with
13132                 full==0
13133 
13134   @note
13135     With subquery this function definitely will be called several times,
13136     but even for simple query it can be called several times.
13137 */
13138 
cleanup(bool full)13139 void JOIN::cleanup(bool full)
13140 {
13141   DBUG_ENTER("JOIN::cleanup");
13142   DBUG_PRINT("enter", ("full %u", (uint) full));
13143 
13144   if (full)
13145     have_query_plan= QEP_DELETED;
13146 
13147   if (original_join_tab)
13148   {
13149     /* Free the original optimized join created for the group_by_handler */
13150     join_tab= original_join_tab;
13151     original_join_tab= 0;
13152     table_count= original_table_count;
13153   }
13154 
13155   if (join_tab)
13156   {
13157     JOIN_TAB *tab;
13158 
13159     if (full)
13160     {
13161       /*
13162         Call cleanup() on join tabs used by the join optimization
13163         (join->join_tab may now be pointing to result of make_simple_join
13164          reading from the temporary table)
13165 
13166         We also need to check table_count to handle various degenerate joins
13167         w/o tables: they don't have some members initialized and
13168         WALK_OPTIMIZATION_TABS may not work correctly for them.
13169       */
13170       if (top_join_tab_count && tables_list)
13171       {
13172         for (tab= first_breadth_first_tab(); tab;
13173              tab= next_breadth_first_tab(first_breadth_first_tab(),
13174                                          top_join_tab_count, tab))
13175         {
13176           tab->cleanup();
13177           delete tab->filesort_result;
13178           tab->filesort_result= NULL;
13179         }
13180       }
13181       cleaned= true;
13182       //psergey2: added (Q: why not in the above loop?)
13183       {
13184         JOIN_TAB *curr_tab= join_tab + exec_join_tab_cnt();
13185         for (uint i= 0; i < aggr_tables; i++, curr_tab++)
13186         {
13187           if (curr_tab->aggr)
13188           {
13189             free_tmp_table(thd, curr_tab->table);
13190             delete curr_tab->tmp_table_param;
13191             curr_tab->tmp_table_param= NULL;
13192             curr_tab->aggr= NULL;
13193 
13194             delete curr_tab->filesort_result;
13195             curr_tab->filesort_result= NULL;
13196           }
13197         }
13198         aggr_tables= 0; // psergey3
13199       }
13200     }
13201     else
13202     {
13203       for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITH_CONST_TABLES); tab;
13204            tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
13205       {
13206         tab->partial_cleanup();
13207       }
13208     }
13209   }
13210   if (full)
13211   {
13212     cleanup_empty_jtbm_semi_joins(this, join_list);
13213 
13214     // Run Cached_item DTORs!
13215     group_fields.delete_elements();
13216 
13217     /*
13218       We can't call delete_elements() on copy_funcs as this will cause
13219       problems in free_elements() as some of the elements are then deleted.
13220     */
13221     tmp_table_param.copy_funcs.empty();
13222     /*
13223       If we have tmp_join and 'this' JOIN is not tmp_join and
13224       tmp_table_param.copy_field's  of them are equal then we have to remove
13225       pointer to  tmp_table_param.copy_field from tmp_join, because it will
13226       be removed in tmp_table_param.cleanup().
13227     */
13228     tmp_table_param.cleanup();
13229 
13230     delete pushdown_query;
13231     pushdown_query= 0;
13232 
13233     if (!join_tab)
13234     {
13235       List_iterator<TABLE_LIST> li(*join_list);
13236       TABLE_LIST *table_ref;
13237       while ((table_ref= li++))
13238       {
13239         if (table_ref->table &&
13240             table_ref->jtbm_subselect &&
13241             table_ref->jtbm_subselect->is_jtbm_const_tab)
13242         {
13243           free_tmp_table(thd, table_ref->table);
13244           table_ref->table= NULL;
13245         }
13246       }
13247     }
13248   }
13249   /* Restore ref array to original state */
13250   if (current_ref_ptrs != items0)
13251   {
13252     set_items_ref_array(items0);
13253     set_group_rpa= false;
13254   }
13255   DBUG_VOID_RETURN;
13256 }
13257 
13258 
13259 /**
13260   Remove the following expressions from ORDER BY and GROUP BY:
13261   Constant expressions @n
13262   Expression that only uses tables that are of type EQ_REF and the reference
13263   is in the ORDER list or if all refereed tables are of the above type.
13264 
13265   In the following, the X field can be removed:
13266   @code
13267   SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t1.a,t2.X
13268   SELECT * FROM t1,t2,t3 WHERE t1.a=t2.a AND t2.b=t3.b ORDER BY t1.a,t3.X
13269   @endcode
13270 
13271   These can't be optimized:
13272   @code
13273   SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t2.X,t1.a
13274   SELECT * FROM t1,t2 WHERE t1.a=t2.a AND t1.b=t2.b ORDER BY t1.a,t2.c
13275   SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t2.b,t1.a
13276   @endcode
13277 
13278   TODO: this function checks ORDER::used, which can only have a value of 0.
13279 */
13280 
13281 static bool
eq_ref_table(JOIN * join,ORDER * start_order,JOIN_TAB * tab)13282 eq_ref_table(JOIN *join, ORDER *start_order, JOIN_TAB *tab)
13283 {
13284   if (tab->cached_eq_ref_table)			// If cached
13285     return tab->eq_ref_table;
13286   tab->cached_eq_ref_table=1;
13287   /* We can skip const tables only if not an outer table */
13288   if (tab->type == JT_CONST && !tab->first_inner)
13289     return (tab->eq_ref_table=1);		/* purecov: inspected */
13290   if (tab->type != JT_EQ_REF || tab->table->maybe_null)
13291     return (tab->eq_ref_table=0);		// We must use this
13292   Item **ref_item=tab->ref.items;
13293   Item **end=ref_item+tab->ref.key_parts;
13294   uint found=0;
13295   table_map map=tab->table->map;
13296 
13297   for (; ref_item != end ; ref_item++)
13298   {
13299     if (! (*ref_item)->const_item())
13300     {						// Not a const ref
13301       ORDER *order;
13302       for (order=start_order ; order ; order=order->next)
13303       {
13304 	if ((*ref_item)->eq(order->item[0],0))
13305 	  break;
13306       }
13307       if (order)
13308       {
13309         if (!(order->used & map))
13310         {
13311           found++;
13312           order->used|= map;
13313         }
13314 	continue;				// Used in ORDER BY
13315       }
13316       if (!only_eq_ref_tables(join,start_order, (*ref_item)->used_tables()))
13317 	return (tab->eq_ref_table=0);
13318     }
13319   }
13320   /* Check that there was no reference to table before sort order */
13321   for (; found && start_order ; start_order=start_order->next)
13322   {
13323     if (start_order->used & map)
13324     {
13325       found--;
13326       continue;
13327     }
13328     if (start_order->depend_map & map)
13329       return (tab->eq_ref_table=0);
13330   }
13331   return tab->eq_ref_table=1;
13332 }
13333 
13334 
13335 static bool
only_eq_ref_tables(JOIN * join,ORDER * order,table_map tables)13336 only_eq_ref_tables(JOIN *join,ORDER *order,table_map tables)
13337 {
13338   tables&= ~PSEUDO_TABLE_BITS;
13339   for (JOIN_TAB **tab=join->map2table ; tables ; tab++, tables>>=1)
13340   {
13341     if (tables & 1 && !eq_ref_table(join, order, *tab))
13342       return 0;
13343   }
13344   return 1;
13345 }
13346 
13347 
13348 /** Update the dependency map for the tables. */
13349 
update_depend_map(JOIN * join)13350 static void update_depend_map(JOIN *join)
13351 {
13352   JOIN_TAB *join_tab;
13353   for (join_tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITH_CONST_TABLES);
13354        join_tab;
13355        join_tab= next_linear_tab(join, join_tab, WITH_BUSH_ROOTS))
13356   {
13357     TABLE_REF *ref= &join_tab->ref;
13358     table_map depend_map=0;
13359     Item **item=ref->items;
13360     uint i;
13361     for (i=0 ; i < ref->key_parts ; i++,item++)
13362       depend_map|=(*item)->used_tables();
13363     depend_map&= ~OUTER_REF_TABLE_BIT;
13364     ref->depend_map= depend_map;
13365     for (JOIN_TAB **tab=join->map2table;
13366          depend_map ;
13367          tab++,depend_map>>=1 )
13368     {
13369       if (depend_map & 1)
13370         ref->depend_map|=(*tab)->ref.depend_map;
13371     }
13372   }
13373 }
13374 
13375 
13376 /** Update the dependency map for the sort order. */
13377 
update_depend_map_for_order(JOIN * join,ORDER * order)13378 static void update_depend_map_for_order(JOIN *join, ORDER *order)
13379 {
13380   for (; order ; order=order->next)
13381   {
13382     table_map depend_map;
13383     order->item[0]->update_used_tables();
13384     order->depend_map=depend_map=order->item[0]->used_tables();
13385     order->used= 0;
13386     // Not item_sum(), RAND() and no reference to table outside of sub select
13387     if (!(order->depend_map & (OUTER_REF_TABLE_BIT | RAND_TABLE_BIT))
13388         && !order->item[0]->with_sum_func &&
13389         join->join_tab)
13390     {
13391       for (JOIN_TAB **tab=join->map2table;
13392 	   depend_map ;
13393 	   tab++, depend_map>>=1)
13394       {
13395 	if (depend_map & 1)
13396 	  order->depend_map|=(*tab)->ref.depend_map;
13397       }
13398     }
13399   }
13400 }
13401 
13402 
13403 /**
13404   Remove all constants and check if ORDER only contains simple
13405   expressions.
13406 
13407   We also remove all duplicate expressions, keeping only the first one.
13408 
13409   simple_order is set to 1 if sort_order only uses fields from head table
13410   and the head table is not a LEFT JOIN table.
13411 
13412   @param join			Join handler
13413   @param first_order		List of SORT or GROUP order
13414   @param cond			WHERE statement
13415   @param change_list		Set to 1 if we should remove things from list.
13416                                 If this is not set, then only simple_order is
13417                                 calculated. This is not set when we
13418                                 are using ROLLUP
13419   @param simple_order		Set to 1 if we are only using simple
13420 				expressions.
13421 
13422   @return
13423     Returns new sort order
13424 */
13425 
13426 static ORDER *
remove_const(JOIN * join,ORDER * first_order,COND * cond,bool change_list,bool * simple_order)13427 remove_const(JOIN *join,ORDER *first_order, COND *cond,
13428              bool change_list, bool *simple_order)
13429 {
13430   *simple_order= join->rollup.state == ROLLUP::STATE_NONE;
13431   if (join->only_const_tables())
13432     return change_list ? 0 : first_order;		// No need to sort
13433 
13434   ORDER *order,**prev_ptr, *tmp_order;
13435   table_map UNINIT_VAR(first_table); /* protected by first_is_base_table */
13436   table_map not_const_tables= ~join->const_table_map;
13437   table_map ref;
13438   bool first_is_base_table= FALSE;
13439   DBUG_ENTER("remove_const");
13440 
13441   /*
13442     Join tab is set after make_join_statistics() has been called.
13443     In case of one table with GROUP BY this function is called before
13444     join_tab is set for the GROUP_BY expression
13445   */
13446   if (join->join_tab)
13447   {
13448     if (join->join_tab[join->const_tables].table)
13449     {
13450       first_table= join->join_tab[join->const_tables].table->map;
13451       first_is_base_table= TRUE;
13452     }
13453 
13454     /*
13455       Cleanup to avoid interference of calls of this function for
13456       ORDER BY and GROUP BY
13457     */
13458     for (JOIN_TAB *tab= join->join_tab + join->const_tables;
13459          tab < join->join_tab + join->table_count;
13460          tab++)
13461       tab->cached_eq_ref_table= FALSE;
13462 
13463     *simple_order= *join->join_tab[join->const_tables].on_expr_ref ? 0 : 1;
13464   }
13465   else
13466   {
13467     first_is_base_table= FALSE;
13468     first_table= 0;                     // Not used, for gcc
13469   }
13470 
13471   prev_ptr= &first_order;
13472 
13473   /* NOTE: A variable of not_const_tables ^ first_table; breaks gcc 2.7 */
13474 
13475   update_depend_map_for_order(join, first_order);
13476   for (order=first_order; order ; order=order->next)
13477   {
13478     table_map order_tables=order->item[0]->used_tables();
13479     if (order->item[0]->with_sum_func ||
13480         order->item[0]->with_window_func ||
13481         /*
13482           If the outer table of an outer join is const (either by itself or
13483           after applying WHERE condition), grouping on a field from such a
13484           table will be optimized away and filesort without temporary table
13485           will be used unless we prevent that now. Filesort is not fit to
13486           handle joins and the join condition is not applied. We can't detect
13487           the case without an expensive test, however, so we force temporary
13488           table for all queries containing more than one table, ROLLUP, and an
13489           outer join.
13490          */
13491         (join->table_count > 1 && join->rollup.state == ROLLUP::STATE_INITED &&
13492         join->outer_join))
13493       *simple_order=0;				// Must do a temp table to sort
13494     else if (!(order_tables & not_const_tables))
13495     {
13496       if (order->item[0]->with_subquery())
13497       {
13498         /*
13499           Delay the evaluation of constant ORDER and/or GROUP expressions that
13500           contain subqueries until the execution phase.
13501         */
13502         join->exec_const_order_group_cond.push_back(order->item[0],
13503                                                     join->thd->mem_root);
13504       }
13505       DBUG_PRINT("info",("removing: %s", order->item[0]->full_name()));
13506       continue;
13507     }
13508     else
13509     {
13510       if (order_tables & (RAND_TABLE_BIT | OUTER_REF_TABLE_BIT))
13511 	*simple_order=0;
13512       else
13513       {
13514 	if (cond && const_expression_in_where(cond,order->item[0]))
13515 	{
13516 	  DBUG_PRINT("info",("removing: %s", order->item[0]->full_name()));
13517 	  continue;
13518 	}
13519 	if (first_is_base_table &&
13520             (ref=order_tables & (not_const_tables ^ first_table)))
13521 	{
13522 	  if (!(order_tables & first_table) &&
13523               only_eq_ref_tables(join,first_order, ref))
13524 	  {
13525 	    DBUG_PRINT("info",("removing: %s", order->item[0]->full_name()));
13526 	    continue;
13527 	  }
13528           /*
13529             UseMultipleEqualitiesToRemoveTempTable:
13530             Can use multiple-equalities here to check that ORDER BY columns
13531             can be used without tmp. table.
13532           */
13533           bool can_subst_to_first_table= false;
13534           bool first_is_in_sjm_nest= false;
13535           if (first_is_base_table)
13536           {
13537             TABLE_LIST *tbl_for_first=
13538               join->join_tab[join->const_tables].table->pos_in_table_list;
13539             first_is_in_sjm_nest= tbl_for_first->sj_mat_info &&
13540                                   tbl_for_first->sj_mat_info->is_used;
13541           }
13542           /*
13543             Currently we do not employ the optimization that uses multiple
13544             equalities for ORDER BY to remove tmp table in the case when
13545             the first table happens to be the result of materialization of
13546             a semi-join nest ( <=> first_is_in_sjm_nest == true).
13547 
13548             When a semi-join nest is materialized and scanned to look for
13549             possible matches in the remaining tables for every its row
13550             the fields from the result of materialization are copied
13551             into the record buffers of tables from the semi-join nest.
13552             So these copies are used to access the remaining tables rather
13553             than the fields from the result of materialization.
13554 
13555             Unfortunately now this so-called 'copy back' technique is
13556             supported only if the rows  are scanned with the rr_sequential
13557             function, but not with other rr_* functions that are employed
13558             when the result of materialization is required to be sorted.
13559 
13560             TODO: either to support 'copy back' technique for the above case,
13561                   or to get rid of this technique altogether.
13562           */
13563           if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_ORDERBY_EQ_PROP) &&
13564               first_is_base_table && !first_is_in_sjm_nest &&
13565               order->item[0]->real_item()->type() == Item::FIELD_ITEM &&
13566               join->cond_equal)
13567           {
13568             table_map first_table_bit=
13569               join->join_tab[join->const_tables].table->map;
13570 
13571             Item *item= order->item[0];
13572 
13573             /*
13574               TODO: equality substitution in the context of ORDER BY is
13575               sometimes allowed when it is not allowed in the general case.
13576 
13577               We make the below call for its side effect: it will locate the
13578               multiple equality the item belongs to and set item->item_equal
13579               accordingly.
13580             */
13581             Item *res= item->propagate_equal_fields(join->thd,
13582                                                     Value_source::
13583                                                     Context_identity(),
13584                                                     join->cond_equal);
13585             Item_equal *item_eq;
13586             if ((item_eq= res->get_item_equal()))
13587             {
13588               Item *first= item_eq->get_first(NO_PARTICULAR_TAB, NULL);
13589               if (first->const_item() || first->used_tables() ==
13590                                          first_table_bit)
13591               {
13592                 can_subst_to_first_table= true;
13593               }
13594             }
13595           }
13596 
13597           if (!can_subst_to_first_table)
13598           {
13599             *simple_order=0;			// Must do a temp table to sort
13600           }
13601 	}
13602       }
13603     }
13604     /* Remove ORDER BY entries that we have seen before */
13605     for (tmp_order= first_order;
13606          tmp_order != order;
13607          tmp_order= tmp_order->next)
13608     {
13609       if (tmp_order->item[0]->eq(order->item[0],1))
13610         break;
13611     }
13612     if (tmp_order != order)
13613       continue;                                // Duplicate order by. Remove
13614 
13615     if (change_list)
13616       *prev_ptr= order;				// use this entry
13617     prev_ptr= &order->next;
13618   }
13619   if (change_list)
13620     *prev_ptr=0;
13621   if (prev_ptr == &first_order)			// Nothing to sort/group
13622     *simple_order=1;
13623 #ifndef DBUG_OFF
13624   if (unlikely(join->thd->is_error()))
13625     DBUG_PRINT("error",("Error from remove_const"));
13626 #endif
13627   DBUG_PRINT("exit",("simple_order: %d",(int) *simple_order));
13628   DBUG_RETURN(first_order);
13629 }
13630 
13631 
13632 /**
13633   Filter out ORDER items those are equal to constants in WHERE
13634 
13635   This function is a limited version of remove_const() for use
13636   with non-JOIN statements (i.e. single-table UPDATE and DELETE).
13637 
13638 
13639   @param order            Linked list of ORDER BY arguments
13640   @param cond             WHERE expression
13641 
13642   @return pointer to new filtered ORDER list or NULL if whole list eliminated
13643 
13644   @note
13645     This function overwrites input order list.
13646 */
13647 
simple_remove_const(ORDER * order,COND * where)13648 ORDER *simple_remove_const(ORDER *order, COND *where)
13649 {
13650   if (!order || !where)
13651     return order;
13652 
13653   ORDER *first= NULL, *prev= NULL;
13654   for (; order; order= order->next)
13655   {
13656     DBUG_ASSERT(!order->item[0]->with_sum_func); // should never happen
13657     if (!const_expression_in_where(where, order->item[0]))
13658     {
13659       if (!first)
13660         first= order;
13661       if (prev)
13662         prev->next= order;
13663       prev= order;
13664     }
13665   }
13666   if (prev)
13667     prev->next= NULL;
13668   return first;
13669 }
13670 
13671 
13672 static int
return_zero_rows(JOIN * join,select_result * result,List<TABLE_LIST> & tables,List<Item> & fields,bool send_row,ulonglong select_options,const char * info,Item * having,List<Item> & all_fields)13673 return_zero_rows(JOIN *join, select_result *result, List<TABLE_LIST> &tables,
13674 		 List<Item> &fields, bool send_row, ulonglong select_options,
13675 		 const char *info, Item *having, List<Item> &all_fields)
13676 {
13677   DBUG_ENTER("return_zero_rows");
13678 
13679   if (select_options & SELECT_DESCRIBE)
13680   {
13681     select_describe(join, FALSE, FALSE, FALSE, info);
13682     DBUG_RETURN(0);
13683   }
13684 
13685   if (send_row)
13686   {
13687     /*
13688       Set all tables to have NULL row. This is needed as we will be evaluating
13689       HAVING condition.
13690     */
13691     List_iterator<TABLE_LIST> ti(tables);
13692     TABLE_LIST *table;
13693     while ((table= ti++))
13694     {
13695       /*
13696         Don't touch semi-join materialization tables, as the above join_free()
13697         call has freed them (and HAVING clause can't have references to them
13698         anyway).
13699       */
13700       if (!table->is_jtbm())
13701         mark_as_null_row(table->table);		// All fields are NULL
13702     }
13703     List_iterator_fast<Item> it(all_fields);
13704     Item *item;
13705     /*
13706       Inform all items (especially aggregating) to calculate HAVING correctly,
13707       also we will need it for sending results.
13708     */
13709     while ((item= it++))
13710       item->no_rows_in_result();
13711     if (having && having->val_int() == 0)
13712       send_row=0;
13713   }
13714 
13715   /* Update results for FOUND_ROWS */
13716   if (!join->send_row_on_empty_set())
13717   {
13718     join->thd->set_examined_row_count(0);
13719     join->thd->limit_found_rows= 0;
13720   }
13721 
13722   if (!(result->send_result_set_metadata(fields,
13723                               Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)))
13724   {
13725     bool send_error= FALSE;
13726     if (send_row)
13727       send_error= result->send_data(fields) > 0;
13728     if (likely(!send_error))
13729       result->send_eof();				// Should be safe
13730   }
13731   /*
13732     JOIN::join_free() must be called after the virtual method
13733     select::send_result_set_metadata() returned control since
13734     implementation of this method could use data strutcures
13735     that are released by the method JOIN::join_free().
13736   */
13737   join->join_free();
13738 
13739   DBUG_RETURN(0);
13740 }
13741 
13742 /**
13743   used only in JOIN::clear (always) and in do_select()
13744   (if there where no matching rows)
13745 
13746   @param join            JOIN
13747   @param cleared_tables  If not null, clear also const tables and mark all
13748                          cleared tables in the map. cleared_tables is only
13749                          set when called from do_select() when there is a
13750                          group function and there where no matching rows.
13751 */
13752 
clear_tables(JOIN * join,table_map * cleared_tables)13753 static void clear_tables(JOIN *join, table_map *cleared_tables)
13754 {
13755   /*
13756     must clear only the non-const tables as const tables are not re-calculated.
13757   */
13758   for (uint i= 0 ; i < join->table_count ; i++)
13759   {
13760     TABLE *table= join->table[i];
13761 
13762     if (table->null_row)
13763       continue;                                 // Nothing more to do
13764     if (!(table->map & join->const_table_map) || cleared_tables)
13765     {
13766       if (cleared_tables)
13767       {
13768         (*cleared_tables)|= (((table_map) 1) << i);
13769         if (table->s->null_bytes)
13770         {
13771           /*
13772             Remember null bits for the record so that we can restore the
13773             original const record in unclear_tables()
13774           */
13775           memcpy(table->record[1], table->null_flags, table->s->null_bytes);
13776         }
13777       }
13778       mark_as_null_row(table);                  // All fields are NULL
13779     }
13780   }
13781 }
13782 
13783 
13784 /**
13785    Reverse null marking for tables and restore null bits.
13786 
13787    We have to do this because the tables may be re-used in a sub query
13788    and the subquery will assume that the const tables contains the original
13789    data before clear_tables().
13790 */
13791 
unclear_tables(JOIN * join,table_map * cleared_tables)13792 static void unclear_tables(JOIN *join, table_map *cleared_tables)
13793 {
13794   for (uint i= 0 ; i < join->table_count ; i++)
13795   {
13796     if ((*cleared_tables) & (((table_map) 1) << i))
13797     {
13798       TABLE *table= join->table[i];
13799       if (table->s->null_bytes)
13800         memcpy(table->null_flags, table->record[1], table->s->null_bytes);
13801       unmark_as_null_row(table);
13802     }
13803   }
13804 }
13805 
13806 
13807 /*****************************************************************************
13808   Make som simple condition optimization:
13809   If there is a test 'field = const' change all refs to 'field' to 'const'
13810   Remove all dummy tests 'item = item', 'const op const'.
13811   Remove all 'item is NULL', when item can never be null!
13812   item->marker should be 0 for all items on entry
13813   Return in cond_value FALSE if condition is impossible (1 = 2)
13814 *****************************************************************************/
13815 
13816 class COND_CMP :public ilink {
13817 public:
operator new(size_t size,MEM_ROOT * mem_root)13818   static void *operator new(size_t size, MEM_ROOT *mem_root)
13819   {
13820     return alloc_root(mem_root, size);
13821   }
operator delete(void * ptr,size_t size)13822   static void operator delete(void *ptr __attribute__((unused)),
13823                               size_t size __attribute__((unused)))
13824   { TRASH_FREE(ptr, size); }
13825 
operator delete(void *,MEM_ROOT *)13826   static void operator delete(void *, MEM_ROOT*) {}
13827 
13828   Item *and_level;
13829   Item_bool_func2 *cmp_func;
COND_CMP(Item * a,Item_bool_func2 * b)13830   COND_CMP(Item *a,Item_bool_func2 *b) :and_level(a),cmp_func(b) {}
13831 };
13832 
13833 /**
13834   Find the multiple equality predicate containing a field.
13835 
13836   The function retrieves the multiple equalities accessed through
13837   the con_equal structure from current level and up looking for
13838   an equality containing field. It stops retrieval as soon as the equality
13839   is found and set up inherited_fl to TRUE if it's found on upper levels.
13840 
13841   @param cond_equal          multiple equalities to search in
13842   @param field               field to look for
13843   @param[out] inherited_fl   set up to TRUE if multiple equality is found
13844                              on upper levels (not on current level of
13845                              cond_equal)
13846 
13847   @return
13848     - Item_equal for the found multiple equality predicate if a success;
13849     - NULL otherwise.
13850 */
13851 
find_item_equal(COND_EQUAL * cond_equal,Field * field,bool * inherited_fl)13852 Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field,
13853                             bool *inherited_fl)
13854 {
13855   Item_equal *item= 0;
13856   bool in_upper_level= FALSE;
13857   while (cond_equal)
13858   {
13859     List_iterator_fast<Item_equal> li(cond_equal->current_level);
13860     while ((item= li++))
13861     {
13862       if (item->contains(field))
13863         goto finish;
13864     }
13865     in_upper_level= TRUE;
13866     cond_equal= cond_equal->upper_levels;
13867   }
13868   in_upper_level= FALSE;
13869 finish:
13870   *inherited_fl= in_upper_level;
13871   return item;
13872 }
13873 
13874 
13875 /**
13876   Check whether an equality can be used to build multiple equalities.
13877 
13878     This function first checks whether the equality (left_item=right_item)
13879     is a simple equality i.e. the one that equates a field with another field
13880     or a constant (field=field_item or field=const_item).
13881     If this is the case the function looks for a multiple equality
13882     in the lists referenced directly or indirectly by cond_equal inferring
13883     the given simple equality. If it doesn't find any, it builds a multiple
13884     equality that covers the predicate, i.e. the predicate can be inferred
13885     from this multiple equality.
13886     The built multiple equality could be obtained in such a way:
13887     create a binary  multiple equality equivalent to the predicate, then
13888     merge it, if possible, with one of old multiple equalities.
13889     This guarantees that the set of multiple equalities covering equality
13890     predicates will be minimal.
13891 
13892   EXAMPLE:
13893     For the where condition
13894     @code
13895       WHERE a=b AND b=c AND
13896             (b=2 OR f=e)
13897     @endcode
13898     the check_equality will be called for the following equality
13899     predicates a=b, b=c, b=2 and f=e.
13900     - For a=b it will be called with *cond_equal=(0,[]) and will transform
13901       *cond_equal into (0,[Item_equal(a,b)]).
13902     - For b=c it will be called with *cond_equal=(0,[Item_equal(a,b)])
13903       and will transform *cond_equal into CE=(0,[Item_equal(a,b,c)]).
13904     - For b=2 it will be called with *cond_equal=(ptr(CE),[])
13905       and will transform *cond_equal into (ptr(CE),[Item_equal(2,a,b,c)]).
13906     - For f=e it will be called with *cond_equal=(ptr(CE), [])
13907       and will transform *cond_equal into (ptr(CE),[Item_equal(f,e)]).
13908 
13909   @note
13910     Now only fields that have the same type definitions (verified by
13911     the Field::eq_def method) are placed to the same multiple equalities.
13912     Because of this some equality predicates are not eliminated and
13913     can be used in the constant propagation procedure.
13914     We could weeken the equlity test as soon as at least one of the
13915     equal fields is to be equal to a constant. It would require a
13916     more complicated implementation: we would have to store, in
13917     general case, its own constant for each fields from the multiple
13918     equality. But at the same time it would allow us to get rid
13919     of constant propagation completely: it would be done by the call
13920     to cond->build_equal_items().
13921 
13922 
13923     The implementation does not follow exactly the above rules to
13924     build a new multiple equality for the equality predicate.
13925     If it processes the equality of the form field1=field2, it
13926     looks for multiple equalities me1 containig field1 and me2 containing
13927     field2. If only one of them is found the fuction expands it with
13928     the lacking field. If multiple equalities for both fields are
13929     found they are merged. If both searches fail a new multiple equality
13930     containing just field1 and field2 is added to the existing
13931     multiple equalities.
13932     If the function processes the predicate of the form field1=const,
13933     it looks for a multiple equality containing field1. If found, the
13934     function checks the constant of the multiple equality. If the value
13935     is unknown, it is setup to const. Otherwise the value is compared with
13936     const and the evaluation of the equality predicate is performed.
13937     When expanding/merging equality predicates from the upper levels
13938     the function first copies them for the current level. It looks
13939     acceptable, as this happens rarely. The implementation without
13940     copying would be much more complicated.
13941 
13942     For description of how equality propagation works with SJM nests, grep
13943     for EqualityPropagationAndSjmNests.
13944 
13945   @param left_item   left term of the quality to be checked
13946   @param right_item  right term of the equality to be checked
13947   @param item        equality item if the equality originates from a condition
13948                      predicate, 0 if the equality is the result of row
13949                      elimination
13950   @param cond_equal  multiple equalities that must hold together with the
13951                      equality
13952 
13953   @retval
13954     TRUE    if the predicate is a simple equality predicate to be used
13955     for building multiple equalities
13956   @retval
13957     FALSE   otherwise
13958 */
13959 
check_simple_equality(THD * thd,const Item::Context & ctx,Item * left_item,Item * right_item,COND_EQUAL * cond_equal)13960 static bool check_simple_equality(THD *thd, const Item::Context &ctx,
13961                                   Item *left_item, Item *right_item,
13962                                   COND_EQUAL *cond_equal)
13963 {
13964   Item *orig_left_item= left_item;
13965   Item *orig_right_item= right_item;
13966   if (left_item->type() == Item::REF_ITEM &&
13967       ((Item_ref*)left_item)->ref_type() == Item_ref::VIEW_REF)
13968   {
13969     if (((Item_ref*)left_item)->get_depended_from())
13970       return FALSE;
13971     if (((Item_direct_view_ref*)left_item)->get_null_ref_table() !=
13972         NO_NULL_TABLE && !left_item->real_item()->used_tables())
13973       return FALSE;
13974     left_item= left_item->real_item();
13975   }
13976   if (right_item->type() == Item::REF_ITEM &&
13977       ((Item_ref*)right_item)->ref_type() == Item_ref::VIEW_REF)
13978   {
13979     if (((Item_ref*)right_item)->get_depended_from())
13980       return FALSE;
13981     if (((Item_direct_view_ref*)right_item)->get_null_ref_table() !=
13982         NO_NULL_TABLE && !right_item->real_item()->used_tables())
13983       return FALSE;
13984     right_item= right_item->real_item();
13985   }
13986   if (left_item->type() == Item::FIELD_ITEM &&
13987       right_item->type() == Item::FIELD_ITEM &&
13988       !((Item_field*)left_item)->get_depended_from() &&
13989       !((Item_field*)right_item)->get_depended_from())
13990   {
13991     /* The predicate the form field1=field2 is processed */
13992 
13993     Field *left_field= ((Item_field*) left_item)->field;
13994     Field *right_field= ((Item_field*) right_item)->field;
13995 
13996     if (!left_field->eq_def(right_field))
13997       return FALSE;
13998 
13999     /* Search for multiple equalities containing field1 and/or field2 */
14000     bool left_copyfl, right_copyfl;
14001     Item_equal *left_item_equal=
14002                find_item_equal(cond_equal, left_field, &left_copyfl);
14003     Item_equal *right_item_equal=
14004                find_item_equal(cond_equal, right_field, &right_copyfl);
14005 
14006     /* As (NULL=NULL) != TRUE we can't just remove the predicate f=f */
14007     if (left_field->eq(right_field)) /* f = f */
14008       return (!(left_field->maybe_null() && !left_item_equal));
14009 
14010     if (left_item_equal && left_item_equal == right_item_equal)
14011     {
14012       /*
14013         The equality predicate is inference of one of the existing
14014         multiple equalities, i.e the condition is already covered
14015         by upper level equalities
14016       */
14017        return TRUE;
14018     }
14019 
14020     /* Copy the found multiple equalities at the current level if needed */
14021     if (left_copyfl)
14022     {
14023       /* left_item_equal of an upper level contains left_item */
14024       left_item_equal= new (thd->mem_root) Item_equal(thd, left_item_equal);
14025       left_item_equal->set_context_field(((Item_field*) left_item));
14026       cond_equal->current_level.push_back(left_item_equal, thd->mem_root);
14027     }
14028     if (right_copyfl)
14029     {
14030       /* right_item_equal of an upper level contains right_item */
14031       right_item_equal= new (thd->mem_root) Item_equal(thd, right_item_equal);
14032       right_item_equal->set_context_field(((Item_field*) right_item));
14033       cond_equal->current_level.push_back(right_item_equal, thd->mem_root);
14034     }
14035 
14036     if (left_item_equal)
14037     {
14038       /* left item was found in the current or one of the upper levels */
14039       if (! right_item_equal)
14040         left_item_equal->add(orig_right_item, thd->mem_root);
14041       else
14042       {
14043         /* Merge two multiple equalities forming a new one */
14044         left_item_equal->merge(thd, right_item_equal);
14045         /* Remove the merged multiple equality from the list */
14046         List_iterator<Item_equal> li(cond_equal->current_level);
14047         while ((li++) != right_item_equal) ;
14048         li.remove();
14049       }
14050     }
14051     else
14052     {
14053       /* left item was not found neither the current nor in upper levels  */
14054       if (right_item_equal)
14055         right_item_equal->add(orig_left_item, thd->mem_root);
14056       else
14057       {
14058         /* None of the fields was found in multiple equalities */
14059         Type_handler_hybrid_field_type
14060           tmp(orig_left_item->type_handler_for_comparison());
14061         if (tmp.aggregate_for_comparison(orig_right_item->
14062                                          type_handler_for_comparison()))
14063           return false;
14064         Item_equal *item_equal=
14065           new (thd->mem_root) Item_equal(thd, tmp.type_handler(),
14066                                          orig_left_item, orig_right_item,
14067                                          false);
14068         item_equal->set_context_field((Item_field*)left_item);
14069         cond_equal->current_level.push_back(item_equal, thd->mem_root);
14070       }
14071     }
14072     return TRUE;
14073   }
14074 
14075   {
14076     /* The predicate of the form field=const/const=field is processed */
14077     Item *const_item= 0;
14078     Item_field *field_item= 0;
14079     Item *orig_field_item= 0;
14080     if (left_item->type() == Item::FIELD_ITEM &&
14081         !((Item_field*)left_item)->get_depended_from() &&
14082         right_item->const_item() && !right_item->is_expensive())
14083     {
14084       orig_field_item= orig_left_item;
14085       field_item= (Item_field *) left_item;
14086       const_item= right_item;
14087     }
14088     else if (right_item->type() == Item::FIELD_ITEM &&
14089              !((Item_field*)right_item)->get_depended_from() &&
14090              left_item->const_item() && !left_item->is_expensive())
14091     {
14092       orig_field_item= orig_right_item;
14093       field_item= (Item_field *) right_item;
14094       const_item= left_item;
14095     }
14096 
14097     if (const_item &&
14098         field_item->field->test_if_equality_guarantees_uniqueness(const_item))
14099     {
14100       /*
14101         field_item and const_item are arguments of a scalar or a row
14102         comparison function:
14103           WHERE column=constant
14104           WHERE (column, ...) = (constant, ...)
14105 
14106         The owner comparison function has previously called fix_fields(),
14107         so field_item and const_item should be directly comparable items,
14108         field_item->cmp_context and const_item->cmp_context should be set.
14109         In case of string comparison, charsets and collations of
14110         field_item and const_item should have already be aggregated
14111         for comparison, all necessary character set converters installed
14112         and fixed.
14113 
14114         In case of string comparison, const_item can be either:
14115         - a weaker constant that does not need to be converted to field_item:
14116             WHERE latin1_field = 'latin1_const'
14117             WHERE varbinary_field = 'latin1_const'
14118             WHERE latin1_bin_field = 'latin1_general_ci_const'
14119         - a stronger constant that does not need to be converted to field_item:
14120             WHERE latin1_field = binary 0xDF
14121             WHERE latin1_field = 'a' COLLATE latin1_bin
14122         - a result of conversion (e.g. from the session character set)
14123           to the character set of field_item:
14124             WHERE latin1_field = 'utf8_string_with_latin1_repertoire'
14125       */
14126       bool copyfl;
14127 
14128       Item_equal *item_equal = find_item_equal(cond_equal,
14129                                                field_item->field, &copyfl);
14130       if (copyfl)
14131       {
14132         item_equal= new (thd->mem_root) Item_equal(thd, item_equal);
14133         cond_equal->current_level.push_back(item_equal, thd->mem_root);
14134         item_equal->set_context_field(field_item);
14135       }
14136       Item *const_item2= field_item->field->get_equal_const_item(thd, ctx,
14137                                                                  const_item);
14138       if (!const_item2)
14139         return false;
14140 
14141       if (item_equal)
14142       {
14143         /*
14144           The flag cond_false will be set to 1 after this, if item_equal
14145           already contains a constant and its value is  not equal to
14146           the value of const_item.
14147         */
14148         item_equal->add_const(thd, const_item2);
14149       }
14150       else
14151       {
14152         Type_handler_hybrid_field_type
14153           tmp(orig_left_item->type_handler_for_comparison());
14154         if (tmp.aggregate_for_comparison(orig_right_item->
14155                                          type_handler_for_comparison()))
14156           return false;
14157         item_equal= new (thd->mem_root) Item_equal(thd, tmp.type_handler(),
14158                                                    const_item2,
14159                                                    orig_field_item, true);
14160         item_equal->set_context_field(field_item);
14161         cond_equal->current_level.push_back(item_equal, thd->mem_root);
14162       }
14163       return TRUE;
14164     }
14165   }
14166   return FALSE;
14167 }
14168 
14169 
14170 /**
14171   Convert row equalities into a conjunction of regular equalities.
14172 
14173     The function converts a row equality of the form (E1,...,En)=(E'1,...,E'n)
14174     into a list of equalities E1=E'1,...,En=E'n. For each of these equalities
14175     Ei=E'i the function checks whether it is a simple equality or a row
14176     equality. If it is a simple equality it is used to expand multiple
14177     equalities of cond_equal. If it is a row equality it converted to a
14178     sequence of equalities between row elements. If Ei=E'i is neither a
14179     simple equality nor a row equality the item for this predicate is added
14180     to eq_list.
14181 
14182   @param thd        thread handle
14183   @param left_row   left term of the row equality to be processed
14184   @param right_row  right term of the row equality to be processed
14185   @param cond_equal multiple equalities that must hold together with the
14186                     predicate
14187   @param eq_list    results of conversions of row equalities that are not
14188                     simple enough to form multiple equalities
14189 
14190   @retval
14191     TRUE    if conversion has succeeded (no fatal error)
14192   @retval
14193     FALSE   otherwise
14194 */
14195 
check_row_equality(THD * thd,const Arg_comparator * comparators,Item * left_row,Item_row * right_row,COND_EQUAL * cond_equal,List<Item> * eq_list)14196 static bool check_row_equality(THD *thd, const Arg_comparator *comparators,
14197                                Item *left_row, Item_row *right_row,
14198                                COND_EQUAL *cond_equal, List<Item>* eq_list)
14199 {
14200   uint n= left_row->cols();
14201   for (uint i= 0 ; i < n; i++)
14202   {
14203     bool is_converted;
14204     Item *left_item= left_row->element_index(i);
14205     Item *right_item= right_row->element_index(i);
14206     if (left_item->type() == Item::ROW_ITEM &&
14207         right_item->type() == Item::ROW_ITEM)
14208     {
14209       /*
14210         Item_splocal for ROW SP variables return Item::ROW_ITEM.
14211         Here we know that left_item and right_item are not Item_splocal,
14212         because ROW SP variables with nested ROWs are not supported yet.
14213         It's safe to cast left_item and right_item to Item_row.
14214       */
14215       DBUG_ASSERT(!left_item->get_item_splocal());
14216       DBUG_ASSERT(!right_item->get_item_splocal());
14217       is_converted= check_row_equality(thd,
14218                                        comparators[i].subcomparators(),
14219                                        (Item_row *) left_item,
14220                                        (Item_row *) right_item,
14221 			               cond_equal, eq_list);
14222     }
14223     else
14224     {
14225       const Arg_comparator *tmp= &comparators[i];
14226       is_converted= check_simple_equality(thd,
14227                                           Item::Context(Item::ANY_SUBST,
14228                                                   tmp->compare_type_handler(),
14229                                                   tmp->compare_collation()),
14230                                           left_item, right_item,
14231                                           cond_equal);
14232     }
14233 
14234     if (!is_converted)
14235     {
14236       Item_func_eq *eq_item;
14237       if (!(eq_item= new (thd->mem_root) Item_func_eq(thd, left_item, right_item)) ||
14238           eq_item->set_cmp_func())
14239         return FALSE;
14240       eq_item->quick_fix_field();
14241       eq_list->push_back(eq_item, thd->mem_root);
14242     }
14243   }
14244   return TRUE;
14245 }
14246 
14247 
14248 /**
14249   Eliminate row equalities and form multiple equalities predicates.
14250 
14251     This function checks whether the item is a simple equality
14252     i.e. the one that equates a field with another field or a constant
14253     (field=field_item or field=constant_item), or, a row equality.
14254     For a simple equality the function looks for a multiple equality
14255     in the lists referenced directly or indirectly by cond_equal inferring
14256     the given simple equality. If it doesn't find any, it builds/expands
14257     multiple equality that covers the predicate.
14258     Row equalities are eliminated substituted for conjunctive regular
14259     equalities which are treated in the same way as original equality
14260     predicates.
14261 
14262   @param thd        thread handle
14263   @param item       predicate to process
14264   @param cond_equal multiple equalities that must hold together with the
14265                     predicate
14266   @param eq_list    results of conversions of row equalities that are not
14267                     simple enough to form multiple equalities
14268 
14269   @retval
14270     TRUE   if re-writing rules have been applied
14271   @retval
14272     FALSE  otherwise, i.e.
14273            if the predicate is not an equality,
14274            or, if the equality is neither a simple one nor a row equality,
14275            or, if the procedure fails by a fatal error.
14276 */
14277 
check_equality(THD * thd,COND_EQUAL * cond_equal,List<Item> * eq_list)14278 bool Item_func_eq::check_equality(THD *thd, COND_EQUAL *cond_equal,
14279                                   List<Item> *eq_list)
14280 {
14281   Item *left_item= arguments()[0];
14282   Item *right_item= arguments()[1];
14283 
14284   if (left_item->type() == Item::ROW_ITEM &&
14285       right_item->type() == Item::ROW_ITEM)
14286   {
14287     /*
14288       Item_splocal::type() for ROW variables returns Item::ROW_ITEM.
14289       Distinguish ROW-type Item_splocal from Item_row.
14290       Example query:
14291         SELECT 1 FROM DUAL WHERE row_sp_variable=ROW(100,200);
14292     */
14293     if (left_item->get_item_splocal() ||
14294         right_item->get_item_splocal())
14295       return false;
14296     return check_row_equality(thd,
14297                               cmp.subcomparators(),
14298                               (Item_row *) left_item,
14299                               (Item_row *) right_item,
14300                               cond_equal, eq_list);
14301   }
14302   return check_simple_equality(thd,
14303                                Context(ANY_SUBST,
14304                                        compare_type_handler(),
14305                                        compare_collation()),
14306                                left_item, right_item, cond_equal);
14307 }
14308 
14309 
14310 /**
14311   Item_xxx::build_equal_items()
14312 
14313   Replace all equality predicates in a condition referenced by "this"
14314   by multiple equality items.
14315 
14316     At each 'and' level the function detects items for equality predicates
14317     and replaced them by a set of multiple equality items of class Item_equal,
14318     taking into account inherited equalities from upper levels.
14319     If an equality predicate is used not in a conjunction it's just
14320     replaced by a multiple equality predicate.
14321     For each 'and' level the function set a pointer to the inherited
14322     multiple equalities in the cond_equal field of the associated
14323     object of the type Item_cond_and.
14324     The function also traverses the cond tree and and for each field reference
14325     sets a pointer to the multiple equality item containing the field, if there
14326     is any. If this multiple equality equates fields to a constant the
14327     function replaces the field reference by the constant in the cases
14328     when the field is not of a string type or when the field reference is
14329     just an argument of a comparison predicate.
14330     The function also determines the maximum number of members in
14331     equality lists of each Item_cond_and object assigning it to
14332     thd->lex->current_select->max_equal_elems.
14333 
14334   @note
14335     Multiple equality predicate =(f1,..fn) is equivalent to the conjuction of
14336     f1=f2, .., fn-1=fn. It substitutes any inference from these
14337     equality predicates that is equivalent to the conjunction.
14338     Thus, =(a1,a2,a3) can substitute for ((a1=a3) AND (a2=a3) AND (a2=a1)) as
14339     it is equivalent to ((a1=a2) AND (a2=a3)).
14340     The function always makes a substitution of all equality predicates occurred
14341     in a conjuction for a minimal set of multiple equality predicates.
14342     This set can be considered as a canonical representation of the
14343     sub-conjunction of the equality predicates.
14344     E.g. (t1.a=t2.b AND t2.b>5 AND t1.a=t3.c) is replaced by
14345     (=(t1.a,t2.b,t3.c) AND t2.b>5), not by
14346     (=(t1.a,t2.b) AND =(t1.a,t3.c) AND t2.b>5);
14347     while (t1.a=t2.b AND t2.b>5 AND t3.c=t4.d) is replaced by
14348     (=(t1.a,t2.b) AND =(t3.c=t4.d) AND t2.b>5),
14349     but if additionally =(t4.d,t2.b) is inherited, it
14350     will be replaced by (=(t1.a,t2.b,t3.c,t4.d) AND t2.b>5)
14351 
14352     The function performs the substitution in a recursive descent by
14353     the condtion tree, passing to the next AND level a chain of multiple
14354     equality predicates which have been built at the upper levels.
14355     The Item_equal items built at the level are attached to other
14356     non-equality conjucts as a sublist. The pointer to the inherited
14357     multiple equalities is saved in the and condition object (Item_cond_and).
14358     This chain allows us for any field reference occurence easyly to find a
14359     multiple equality that must be held for this occurence.
14360     For each AND level we do the following:
14361     - scan it for all equality predicate (=) items
14362     - join them into disjoint Item_equal() groups
14363     - process the included OR conditions recursively to do the same for
14364       lower AND levels.
14365 
14366     We need to do things in this order as lower AND levels need to know about
14367     all possible Item_equal objects in upper levels.
14368 
14369   @param thd        thread handle
14370   @param inherited  path to all inherited multiple equality items
14371 
14372   @return
14373     pointer to the transformed condition,
14374     whose Used_tables_and_const_cache is up to date,
14375     so no additional update_used_tables() is needed on the result.
14376 */
14377 
build_equal_items(THD * thd,COND_EQUAL * inherited,bool link_item_fields,COND_EQUAL ** cond_equal_ref)14378 COND *Item_cond_and::build_equal_items(THD *thd,
14379                                        COND_EQUAL *inherited,
14380                                        bool link_item_fields,
14381                                        COND_EQUAL **cond_equal_ref)
14382 {
14383   Item_equal *item_equal;
14384   COND_EQUAL cond_equal;
14385   cond_equal.upper_levels= inherited;
14386 
14387   if (check_stack_overrun(thd, STACK_MIN_SIZE, NULL))
14388     return this;                          // Fatal error flag is set!
14389 
14390   List<Item> eq_list;
14391   List<Item> *cond_args= argument_list();
14392 
14393   List_iterator<Item> li(*cond_args);
14394   Item *item;
14395 
14396   DBUG_ASSERT(!cond_equal_ref || !cond_equal_ref[0]);
14397   /*
14398      Retrieve all conjuncts of this level detecting the equality
14399      that are subject to substitution by multiple equality items and
14400      removing each such predicate from the conjunction after having
14401      found/created a multiple equality whose inference the predicate is.
14402  */
14403   while ((item= li++))
14404   {
14405     /*
14406       PS/SP note: we can safely remove a node from AND-OR
14407       structure here because it's restored before each
14408       re-execution of any prepared statement/stored procedure.
14409     */
14410     if (item->check_equality(thd, &cond_equal, &eq_list))
14411       li.remove();
14412   }
14413 
14414   /*
14415     Check if we eliminated all the predicates of the level, e.g.
14416     (a=a AND b=b AND a=a).
14417   */
14418   if (!cond_args->elements &&
14419       !cond_equal.current_level.elements &&
14420       !eq_list.elements)
14421     return new (thd->mem_root) Item_int(thd, (longlong) 1, 1);
14422 
14423   List_iterator_fast<Item_equal> it(cond_equal.current_level);
14424   while ((item_equal= it++))
14425   {
14426     item_equal->set_link_equal_fields(link_item_fields);
14427     item_equal->fix_fields(thd, NULL);
14428     item_equal->update_used_tables();
14429     set_if_bigger(thd->lex->current_select->max_equal_elems,
14430                   item_equal->n_field_items());
14431   }
14432 
14433   m_cond_equal.copy(cond_equal);
14434   cond_equal.current_level= m_cond_equal.current_level;
14435   inherited= &m_cond_equal;
14436 
14437   /*
14438      Make replacement of equality predicates for lower levels
14439      of the condition expression.
14440   */
14441   li.rewind();
14442   while ((item= li++))
14443   {
14444     Item *new_item;
14445     if ((new_item= item->build_equal_items(thd, inherited, false, NULL))
14446         != item)
14447     {
14448       /* This replacement happens only for standalone equalities */
14449       /*
14450         This is ok with PS/SP as the replacement is done for
14451         cond_args of an AND/OR item, which are restored for each
14452         execution of PS/SP.
14453       */
14454       li.replace(new_item);
14455     }
14456   }
14457   cond_args->append(&eq_list);
14458   cond_args->append((List<Item> *)&cond_equal.current_level);
14459   update_used_tables();
14460   if (cond_equal_ref)
14461     *cond_equal_ref= &m_cond_equal;
14462   return this;
14463 }
14464 
14465 
build_equal_items(THD * thd,COND_EQUAL * inherited,bool link_item_fields,COND_EQUAL ** cond_equal_ref)14466 COND *Item_cond::build_equal_items(THD *thd,
14467                                    COND_EQUAL *inherited,
14468                                    bool link_item_fields,
14469                                    COND_EQUAL **cond_equal_ref)
14470 {
14471   List<Item> *cond_args= argument_list();
14472 
14473   List_iterator<Item> li(*cond_args);
14474   Item *item;
14475 
14476   DBUG_ASSERT(!cond_equal_ref || !cond_equal_ref[0]);
14477   /*
14478      Make replacement of equality predicates for lower levels
14479      of the condition expression.
14480      Update used_tables_cache and const_item_cache on the way.
14481   */
14482   used_tables_and_const_cache_init();
14483   while ((item= li++))
14484   {
14485     Item *new_item;
14486     if ((new_item= item->build_equal_items(thd, inherited, false, NULL))
14487         != item)
14488     {
14489       /* This replacement happens only for standalone equalities */
14490       /*
14491         This is ok with PS/SP as the replacement is done for
14492         arguments of an AND/OR item, which are restored for each
14493         execution of PS/SP.
14494       */
14495       li.replace(new_item);
14496     }
14497     used_tables_and_const_cache_join(new_item);
14498   }
14499   return this;
14500 }
14501 
14502 
build_equal_items(THD * thd,COND_EQUAL * inherited,bool link_item_fields,COND_EQUAL ** cond_equal_ref)14503 COND *Item_func_eq::build_equal_items(THD *thd,
14504                                       COND_EQUAL *inherited,
14505                                       bool link_item_fields,
14506                                       COND_EQUAL **cond_equal_ref)
14507 {
14508   COND_EQUAL cond_equal;
14509   cond_equal.upper_levels= inherited;
14510   List<Item> eq_list;
14511 
14512   DBUG_ASSERT(!cond_equal_ref || !cond_equal_ref[0]);
14513   /*
14514     If an equality predicate forms the whole and level,
14515     we call it standalone equality and it's processed here.
14516     E.g. in the following where condition
14517     WHERE a=5 AND (b=5 or a=c)
14518     (b=5) and (a=c) are standalone equalities.
14519     In general we can't leave alone standalone eqalities:
14520     for WHERE a=b AND c=d AND (b=c OR d=5)
14521     b=c is replaced by =(a,b,c,d).
14522    */
14523   if (Item_func_eq::check_equality(thd, &cond_equal, &eq_list))
14524   {
14525     Item_equal *item_equal;
14526     int n= cond_equal.current_level.elements + eq_list.elements;
14527     if (n == 0)
14528       return new (thd->mem_root) Item_int(thd, (longlong) 1, 1);
14529     else if (n == 1)
14530     {
14531       if ((item_equal= cond_equal.current_level.pop()))
14532       {
14533         item_equal->fix_fields(thd, NULL);
14534         item_equal->update_used_tables();
14535         set_if_bigger(thd->lex->current_select->max_equal_elems,
14536                       item_equal->n_field_items());
14537         item_equal->upper_levels= inherited;
14538         if (cond_equal_ref)
14539           *cond_equal_ref= new (thd->mem_root) COND_EQUAL(item_equal,
14540                                                           thd->mem_root);
14541         return item_equal;
14542       }
14543       Item *res= eq_list.pop();
14544       res->update_used_tables();
14545       DBUG_ASSERT(res->type() == FUNC_ITEM);
14546       return res;
14547     }
14548     else
14549     {
14550       /*
14551         Here a new AND level must be created. It can happen only
14552         when a row equality is processed as a standalone predicate.
14553       */
14554       Item_cond_and *and_cond= new (thd->mem_root) Item_cond_and(thd, eq_list);
14555       and_cond->quick_fix_field();
14556       List<Item> *cond_args= and_cond->argument_list();
14557       List_iterator_fast<Item_equal> it(cond_equal.current_level);
14558       while ((item_equal= it++))
14559       {
14560         if (item_equal->fix_length_and_dec())
14561           return NULL;
14562         item_equal->update_used_tables();
14563         set_if_bigger(thd->lex->current_select->max_equal_elems,
14564                       item_equal->n_field_items());
14565       }
14566       and_cond->m_cond_equal.copy(cond_equal);
14567       cond_equal.current_level= and_cond->m_cond_equal.current_level;
14568       cond_args->append((List<Item> *)&cond_equal.current_level);
14569       and_cond->update_used_tables();
14570       if (cond_equal_ref)
14571         *cond_equal_ref= &and_cond->m_cond_equal;
14572       return and_cond;
14573     }
14574   }
14575   return Item_func::build_equal_items(thd, inherited, link_item_fields,
14576                                       cond_equal_ref);
14577 }
14578 
14579 
build_equal_items(THD * thd,COND_EQUAL * inherited,bool link_item_fields,COND_EQUAL ** cond_equal_ref)14580 COND *Item_func::build_equal_items(THD *thd, COND_EQUAL *inherited,
14581                                    bool link_item_fields,
14582                                    COND_EQUAL **cond_equal_ref)
14583 {
14584   /*
14585     For each field reference in cond, not from equal item predicates,
14586     set a pointer to the multiple equality it belongs to (if there is any)
14587     as soon the field is not of a string type or the field reference is
14588     an argument of a comparison predicate.
14589   */
14590   COND *cond= propagate_equal_fields(thd, Context_boolean(), inherited);
14591   cond->update_used_tables();
14592   DBUG_ASSERT(cond == this);
14593   DBUG_ASSERT(!cond_equal_ref || !cond_equal_ref[0]);
14594   return cond;
14595 }
14596 
14597 
build_equal_items(THD * thd,COND_EQUAL * inherited,bool link_item_fields,COND_EQUAL ** cond_equal_ref)14598 COND *Item_equal::build_equal_items(THD *thd, COND_EQUAL *inherited,
14599                                     bool link_item_fields,
14600                                     COND_EQUAL **cond_equal_ref)
14601 {
14602   COND *cond= Item_func::build_equal_items(thd, inherited, link_item_fields,
14603                                            cond_equal_ref);
14604   if (cond_equal_ref)
14605     *cond_equal_ref= new (thd->mem_root) COND_EQUAL(this, thd->mem_root);
14606   return cond;
14607 }
14608 
14609 
14610 /**
14611   Build multiple equalities for a condition and all on expressions that
14612   inherit these multiple equalities.
14613 
14614     The function first applies the cond->build_equal_items() method
14615     to build all multiple equalities for condition cond utilizing equalities
14616     referred through the parameter inherited. The extended set of
14617     equalities is returned in the structure referred by the cond_equal_ref
14618     parameter. After this the function calls itself recursively for
14619     all on expressions whose direct references can be found in join_list
14620     and who inherit directly the multiple equalities just having built.
14621 
14622   @note
14623     The on expression used in an outer join operation inherits all equalities
14624     from the on expression of the embedding join, if there is any, or
14625     otherwise - from the where condition.
14626     This fact is not obvious, but presumably can be proved.
14627     Consider the following query:
14628     @code
14629       SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t1.a=t3.a AND t2.a=t4.a
14630         WHERE t1.a=t2.a;
14631     @endcode
14632     If the on expression in the query inherits =(t1.a,t2.a), then we
14633     can build the multiple equality =(t1.a,t2.a,t3.a,t4.a) that infers
14634     the equality t3.a=t4.a. Although the on expression
14635     t1.a=t3.a AND t2.a=t4.a AND t3.a=t4.a is not equivalent to the one
14636     in the query the latter can be replaced by the former: the new query
14637     will return the same result set as the original one.
14638 
14639     Interesting that multiple equality =(t1.a,t2.a,t3.a,t4.a) allows us
14640     to use t1.a=t3.a AND t3.a=t4.a under the on condition:
14641     @code
14642       SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t1.a=t3.a AND t3.a=t4.a
14643         WHERE t1.a=t2.a
14644     @endcode
14645     This query equivalent to:
14646     @code
14647       SELECT * FROM (t1 LEFT JOIN (t3,t4) ON t1.a=t3.a AND t3.a=t4.a),t2
14648         WHERE t1.a=t2.a
14649     @endcode
14650     Similarly the original query can be rewritten to the query:
14651     @code
14652       SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t2.a=t4.a AND t3.a=t4.a
14653         WHERE t1.a=t2.a
14654     @endcode
14655     that is equivalent to:
14656     @code
14657       SELECT * FROM (t2 LEFT JOIN (t3,t4)ON t2.a=t4.a AND t3.a=t4.a), t1
14658         WHERE t1.a=t2.a
14659     @endcode
14660     Thus, applying equalities from the where condition we basically
14661     can get more freedom in performing join operations.
14662     Although we don't use this property now, it probably makes sense to use
14663     it in the future.
14664   @param thd		     Thread handler
14665   @param cond                condition to build the multiple equalities for
14666   @param inherited           path to all inherited multiple equality items
14667   @param join_list           list of join tables to which the condition
14668                              refers to
14669   @ignore_on_conds           TRUE <-> do not build multiple equalities
14670                              for on expressions
14671   @param[out] cond_equal_ref pointer to the structure to place built
14672                              equalities in
14673   @param link_equal_items    equal fields are to be linked
14674 
14675   @return
14676     pointer to the transformed condition containing multiple equalities
14677 */
14678 
build_equal_items(JOIN * join,COND * cond,COND_EQUAL * inherited,List<TABLE_LIST> * join_list,bool ignore_on_conds,COND_EQUAL ** cond_equal_ref,bool link_equal_fields)14679 static COND *build_equal_items(JOIN *join, COND *cond,
14680                                COND_EQUAL *inherited,
14681                                List<TABLE_LIST> *join_list,
14682                                bool ignore_on_conds,
14683                                COND_EQUAL **cond_equal_ref,
14684                                bool link_equal_fields)
14685 {
14686   THD *thd= join->thd;
14687 
14688   *cond_equal_ref= NULL;
14689 
14690   if (cond)
14691   {
14692     cond= cond->build_equal_items(thd, inherited, link_equal_fields,
14693                                   cond_equal_ref);
14694     if (*cond_equal_ref)
14695     {
14696       (*cond_equal_ref)->upper_levels= inherited;
14697       inherited= *cond_equal_ref;
14698     }
14699   }
14700 
14701   if (join_list && !ignore_on_conds)
14702   {
14703     TABLE_LIST *table;
14704     List_iterator<TABLE_LIST> li(*join_list);
14705 
14706     while ((table= li++))
14707     {
14708       if (table->on_expr)
14709       {
14710         List<TABLE_LIST> *nested_join_list= table->nested_join ?
14711           &table->nested_join->join_list : NULL;
14712         /*
14713           We can modify table->on_expr because its old value will
14714           be restored before re-execution of PS/SP.
14715         */
14716         table->on_expr= build_equal_items(join, table->on_expr, inherited,
14717                                           nested_join_list, ignore_on_conds,
14718                                           &table->cond_equal);
14719       }
14720     }
14721   }
14722 
14723   return cond;
14724 }
14725 
14726 
14727 /**
14728   Compare field items by table order in the execution plan.
14729 
14730     If field1 and field2 belong to different tables then
14731     field1 considered as better than field2 if the table containing
14732     field1 is accessed earlier than the table containing field2.
14733     The function finds out what of two fields is better according
14734     this criteria.
14735     If field1 and field2 belong to the same table then the result
14736     of comparison depends on whether the fields are parts of
14737     the key that are used to access this table.
14738 
14739   @param field1          first field item to compare
14740   @param field2          second field item to compare
14741   @param table_join_idx  index to tables determining table order
14742 
14743   @retval
14744     1  if field1 is better than field2
14745   @retval
14746     -1  if field2 is better than field1
14747   @retval
14748     0  otherwise
14749 */
14750 
compare_fields_by_table_order(Item * field1,Item * field2,void * table_join_idx)14751 static int compare_fields_by_table_order(Item *field1,
14752                                          Item *field2,
14753                                          void *table_join_idx)
14754 {
14755   int cmp= 0;
14756   bool outer_ref= 0;
14757   Item *field1_real= field1->real_item();
14758   Item *field2_real= field2->real_item();
14759 
14760   if (field1->const_item() || field1_real->const_item())
14761     return -1;
14762   if (field2->const_item() || field2_real->const_item())
14763     return 1;
14764   Item_field *f1= (Item_field *) field1_real;
14765   Item_field *f2= (Item_field *) field2_real;
14766   if (f1->used_tables() & OUTER_REF_TABLE_BIT)
14767   {
14768     outer_ref= 1;
14769     cmp= -1;
14770   }
14771   if (f2->used_tables() & OUTER_REF_TABLE_BIT)
14772   {
14773     outer_ref= 1;
14774     cmp++;
14775   }
14776   if (outer_ref)
14777     return cmp;
14778   JOIN_TAB **idx= (JOIN_TAB **) table_join_idx;
14779 
14780   JOIN_TAB *tab1= idx[f1->field->table->tablenr];
14781   JOIN_TAB *tab2= idx[f2->field->table->tablenr];
14782 
14783   /*
14784     if one of the table is inside a merged SJM nest and another one isn't,
14785     compare SJM bush roots of the tables.
14786   */
14787   if (tab1->bush_root_tab != tab2->bush_root_tab)
14788   {
14789     if (tab1->bush_root_tab)
14790       tab1= tab1->bush_root_tab;
14791 
14792     if (tab2->bush_root_tab)
14793       tab2= tab2->bush_root_tab;
14794   }
14795 
14796   cmp= (int)(tab1 - tab2);
14797 
14798   if (!cmp)
14799   {
14800     /* Fields f1, f2 belong to the same table */
14801 
14802     JOIN_TAB *tab= idx[f1->field->table->tablenr];
14803     uint keyno= MAX_KEY;
14804     if (tab->ref.key_parts)
14805       keyno= tab->ref.key;
14806     else if (tab->select && tab->select->quick)
14807        keyno = tab->select->quick->index;
14808     if (keyno != MAX_KEY)
14809     {
14810       if (f1->field->part_of_key.is_set(keyno))
14811         cmp= -1;
14812       if (f2->field->part_of_key.is_set(keyno))
14813         cmp++;
14814       /*
14815         Here:
14816         if both f1, f2 are components of the key tab->ref.key then cmp==0,
14817         if only f1 is a component of the key then cmp==-1 (f1 is better),
14818         if only f2 is a component of the key then cmp==1, (f2 is better),
14819         if none of f1,f1 is component of the key cmp==0.
14820       */
14821       if (!cmp)
14822       {
14823         KEY *key_info= tab->table->key_info + keyno;
14824         for (uint i= 0; i < key_info->user_defined_key_parts; i++)
14825 	{
14826           Field *fld= key_info->key_part[i].field;
14827           if (fld->eq(f1->field))
14828 	  {
14829 	    cmp= -1; // f1 is better
14830             break;
14831           }
14832           if (fld->eq(f2->field))
14833 	  {
14834 	    cmp= 1;  // f2 is better
14835             break;
14836           }
14837         }
14838       }
14839     }
14840     if (!cmp)
14841       cmp= f1->field->field_index-f2->field->field_index;
14842   }
14843   return cmp < 0 ? -1 : (cmp ? 1 : 0);
14844 }
14845 
14846 
embedding_sjm(Item * item)14847 static TABLE_LIST* embedding_sjm(Item *item)
14848 {
14849   Item_field *item_field= (Item_field *) (item->real_item());
14850   TABLE_LIST *nest= item_field->field->table->pos_in_table_list->embedding;
14851   if (nest && nest->sj_mat_info && nest->sj_mat_info->is_used)
14852     return nest;
14853   else
14854     return NULL;
14855 }
14856 
14857 /**
14858   Generate minimal set of simple equalities equivalent to a multiple equality.
14859 
14860     The function retrieves the fields of the multiple equality item
14861     item_equal and  for each field f:
14862     - if item_equal contains const it generates the equality f=const_item;
14863     - otherwise, if f is not the first field, generates the equality
14864       f=item_equal->get_first().
14865     All generated equality are added to the cond conjunction.
14866 
14867   @param cond            condition to add the generated equality to
14868   @param upper_levels    structure to access multiple equality of upper levels
14869   @param item_equal      multiple equality to generate simple equality from
14870 
14871   @note
14872     Before generating an equality function checks that it has not
14873     been generated for multiple equalities of the upper levels.
14874     E.g. for the following where condition
14875     WHERE a=5 AND ((a=b AND b=c) OR  c>4)
14876     the upper level AND condition will contain =(5,a),
14877     while the lower level AND condition will contain =(5,a,b,c).
14878     When splitting =(5,a,b,c) into a separate equality predicates
14879     we should omit 5=a, as we have it already in the upper level.
14880     The following where condition gives us a more complicated case:
14881     WHERE t1.a=t2.b AND t3.c=t4.d AND (t2.b=t3.c OR t4.e>5 ...) AND ...
14882     Given the tables are accessed in the order t1->t2->t3->t4 for
14883     the selected query execution plan the lower level multiple
14884     equality =(t1.a,t2.b,t3.c,t4.d) formally  should be converted to
14885     t1.a=t2.b AND t1.a=t3.c AND t1.a=t4.d. But t1.a=t2.a will be
14886     generated for the upper level. Also t3.c=t4.d will be generated there.
14887     So only t1.a=t3.c should be left in the lower level.
14888     If cond is equal to 0, then not more then one equality is generated
14889     and a pointer to it is returned as the result of the function.
14890 
14891     Equality substutution and semi-join materialization nests:
14892 
14893        In case join order looks like this:
14894 
14895           outer_tbl1 outer_tbl2 SJM (inner_tbl1 inner_tbl2) outer_tbl3
14896 
14897         We must not construct equalities like
14898 
14899            outer_tbl1.col = inner_tbl1.col
14900 
14901         because they would get attached to inner_tbl1 and will get evaluated
14902         during materialization phase, when we don't have current value of
14903         outer_tbl1.col.
14904 
14905         Item_equal::get_first() also takes similar measures for dealing with
14906         equality substitution in presense of SJM nests.
14907 
14908     Grep for EqualityPropagationAndSjmNests for a more verbose description.
14909 
14910   @return
14911     - The condition with generated simple equalities or
14912     a pointer to the simple generated equality, if success.
14913     - 0, otherwise.
14914 */
14915 
eliminate_item_equal(THD * thd,COND * cond,COND_EQUAL * upper_levels,Item_equal * item_equal)14916 Item *eliminate_item_equal(THD *thd, COND *cond, COND_EQUAL *upper_levels,
14917                            Item_equal *item_equal)
14918 {
14919   List<Item> eq_list;
14920   Item_func_eq *eq_item= 0;
14921   if (((Item *) item_equal)->const_item() && !item_equal->val_int())
14922     return new (thd->mem_root) Item_int(thd, (longlong) 0, 1);
14923   Item *item_const= item_equal->get_const();
14924   Item_equal_fields_iterator it(*item_equal);
14925   Item *head;
14926   TABLE_LIST *current_sjm= NULL;
14927   Item *current_sjm_head= NULL;
14928 
14929   DBUG_ASSERT(!cond ||
14930               cond->type() == Item::INT_ITEM ||
14931               (cond->type() == Item::FUNC_ITEM &&
14932                ((Item_func *) cond)->functype() == Item_func::EQ_FUNC) ||
14933               (cond->type() == Item::COND_ITEM  &&
14934                ((Item_func *) cond)->functype() == Item_func::COND_AND_FUNC));
14935 
14936   /*
14937     Pick the "head" item: the constant one or the first in the join order
14938     (if the first in the join order happends to be inside an SJM nest, that's
14939     ok, because this is where the value will be unpacked after
14940     materialization).
14941   */
14942   if (item_const)
14943     head= item_const;
14944   else
14945   {
14946     TABLE_LIST *emb_nest;
14947     head= item_equal->get_first(NO_PARTICULAR_TAB, NULL);
14948     it++;
14949     if ((emb_nest= embedding_sjm(head)))
14950     {
14951       current_sjm= emb_nest;
14952       current_sjm_head= head;
14953     }
14954   }
14955 
14956   Item *field_item;
14957   /*
14958     For each other item, generate "item=head" equality (except the tables that
14959     are within SJ-Materialization nests, for those "head" is defined
14960     differently)
14961   */
14962   while ((field_item= it++))
14963   {
14964     Item_equal *upper= field_item->find_item_equal(upper_levels);
14965     Item *item= field_item;
14966     TABLE_LIST *field_sjm= embedding_sjm(field_item);
14967     if (!field_sjm)
14968     {
14969       current_sjm= NULL;
14970       current_sjm_head= NULL;
14971     }
14972 
14973     /*
14974       Check if "field_item=head" equality is already guaranteed to be true
14975       on upper AND-levels.
14976     */
14977     if (upper)
14978     {
14979       TABLE_LIST *native_sjm= embedding_sjm(item_equal->context_field);
14980       Item *upper_const= upper->get_const();
14981       if (item_const && upper_const)
14982       {
14983         /*
14984           Upper item also has "field_item=const".
14985           Don't produce equality if const is equal to item_const.
14986         */
14987         Item_func_eq *func= new (thd->mem_root) Item_func_eq(thd, item_const, upper_const);
14988         func->set_cmp_func();
14989         func->quick_fix_field();
14990         if (func->val_int())
14991           item= 0;
14992       }
14993       else
14994       {
14995         Item_equal_fields_iterator li(*item_equal);
14996         while ((item= li++) != field_item)
14997         {
14998           if (embedding_sjm(item) == field_sjm &&
14999               item->find_item_equal(upper_levels) == upper)
15000             break;
15001         }
15002       }
15003       if (embedding_sjm(field_item) != native_sjm)
15004         item= NULL; /* Don't produce equality */
15005     }
15006 
15007     bool produce_equality= MY_TEST(item == field_item);
15008     if (!item_const && field_sjm && field_sjm != current_sjm)
15009     {
15010       /* Entering an SJM nest */
15011       current_sjm_head= field_item;
15012       if (!field_sjm->sj_mat_info->is_sj_scan)
15013         produce_equality= FALSE;
15014     }
15015 
15016     if (produce_equality)
15017     {
15018       if (eq_item && eq_list.push_back(eq_item, thd->mem_root))
15019         return 0;
15020 
15021       /*
15022         If we're inside an SJM-nest (current_sjm!=NULL), and the multi-equality
15023         doesn't include a constant, we should produce equality with the first
15024         of the equal items in this SJM (except for the first element inside the
15025         SJM. For that, we produce the equality with the "head" item).
15026 
15027         In other cases, get the "head" item, which is either first of the
15028         equals on top level, or the constant.
15029       */
15030       Item *head_item= (!item_const && current_sjm &&
15031                         current_sjm_head != field_item) ? current_sjm_head: head;
15032       Item *head_real_item=  head_item->real_item();
15033       if (head_real_item->type() == Item::FIELD_ITEM)
15034         head_item= head_real_item;
15035 
15036       eq_item= new (thd->mem_root) Item_func_eq(thd, field_item->real_item(), head_item);
15037 
15038       if (!eq_item || eq_item->set_cmp_func())
15039         return 0;
15040       eq_item->quick_fix_field();
15041     }
15042     current_sjm= field_sjm;
15043   }
15044 
15045   /*
15046     We have produced zero, one, or more pair-wise equalities eq_i. We want to
15047     return an expression in form:
15048 
15049       cond AND eq_1 AND eq_2 AND eq_3 AND ...
15050 
15051     'cond' is a parameter for this function, which may be NULL, an Item_int(1),
15052     or an Item_func_eq or an Item_cond_and.
15053 
15054     We want to return a well-formed condition: no nested Item_cond_and objects,
15055     or Item_cond_and with a single child:
15056     - if 'cond' is an Item_cond_and, we add eq_i as its tail
15057     - if 'cond' is Item_int(1), we return eq_i
15058     - otherwise, we create our own Item_cond_and and put 'cond' at the front of
15059       it.
15060     - if we have only one condition to return, we don't create an Item_cond_and
15061   */
15062 
15063   if (eq_item && eq_list.push_back(eq_item, thd->mem_root))
15064     return 0;
15065   COND *res= 0;
15066   switch (eq_list.elements)
15067   {
15068   case 0:
15069     res= cond ? cond : new (thd->mem_root) Item_int(thd, (longlong) 1, 1);
15070     break;
15071   case 1:
15072     if (!cond || cond->type() ==  Item::INT_ITEM)
15073       res= eq_item;
15074     break;
15075   default:
15076     break;
15077   }
15078   if (!res)
15079   {
15080     if (cond)
15081     {
15082       if (cond->type() == Item::COND_ITEM)
15083       {
15084         res= cond;
15085         ((Item_cond *) res)->add_at_end(&eq_list);
15086       }
15087       else if (eq_list.push_front(cond, thd->mem_root))
15088         return 0;
15089     }
15090   }
15091   if (!res)
15092     res= new (thd->mem_root) Item_cond_and(thd, eq_list);
15093   if (res)
15094   {
15095     res->quick_fix_field();
15096     res->update_used_tables();
15097   }
15098 
15099   return res;
15100 }
15101 
15102 
15103 /**
15104   Substitute every field reference in a condition by the best equal field
15105   and eliminate all multiple equality predicates.
15106 
15107     The function retrieves the cond condition and for each encountered
15108     multiple equality predicate it sorts the field references in it
15109     according to the order of tables specified by the table_join_idx
15110     parameter. Then it eliminates the multiple equality predicate it
15111     replacing it by the conjunction of simple equality predicates
15112     equating every field from the multiple equality to the first
15113     field in it, or to the constant, if there is any.
15114     After this the function retrieves all other conjuncted
15115     predicates substitute every field reference by the field reference
15116     to the first equal field or equal constant if there are any.
15117 
15118   @param context_tab     Join tab that 'cond' will be attached to, or
15119                          NO_PARTICULAR_TAB. See notes above.
15120   @param cond            condition to process
15121   @param cond_equal      multiple equalities to take into consideration
15122   @param table_join_idx  index to tables determining field preference
15123 
15124   @note
15125     At the first glance full sort of fields in multiple equality
15126     seems to be an overkill. Yet it's not the case due to possible
15127     new fields in multiple equality item of lower levels. We want
15128     the order in them to comply with the order of upper levels.
15129 
15130     context_tab may be used to specify which join tab `cond` will be
15131     attached to. There are two possible cases:
15132 
15133     1. context_tab != NO_PARTICULAR_TAB
15134        We're doing substitution for an Item which will be evaluated in the
15135        context of a particular item. For example, if the optimizer does a
15136        ref access on "tbl1.key= expr" then
15137         = equality substitution will be perfomed on 'expr'
15138         = it is known in advance that 'expr' will be evaluated when
15139           table t1 is accessed.
15140        Note that in this kind of substution we never have to replace Item_equal
15141        objects. For example, for
15142 
15143         t.key= func(col1=col2 AND col2=const)
15144 
15145        we will not build Item_equal or do equality substution (if we decide to,
15146        this function will need to be fixed to handle it)
15147 
15148     2. context_tab == NO_PARTICULAR_TAB
15149        We're doing substitution in WHERE/ON condition, which is not yet
15150        attached to any particular join_tab. We will use information about the
15151        chosen join order to make "optimal" substitions, i.e. those that allow
15152        to apply filtering as soon as possible. See eliminate_item_equal() and
15153        Item_equal::get_first() for details.
15154 
15155   @return
15156     The transformed condition, or NULL in case of error
15157 */
15158 
substitute_for_best_equal_field(THD * thd,JOIN_TAB * context_tab,COND * cond,COND_EQUAL * cond_equal,void * table_join_idx)15159 static COND* substitute_for_best_equal_field(THD *thd, JOIN_TAB *context_tab,
15160                                              COND *cond,
15161                                              COND_EQUAL *cond_equal,
15162                                              void *table_join_idx)
15163 {
15164   Item_equal *item_equal;
15165   COND *org_cond= cond;                 // Return this in case of fatal error
15166 
15167   if (cond->type() == Item::COND_ITEM)
15168   {
15169     List<Item> *cond_list= ((Item_cond*) cond)->argument_list();
15170 
15171     bool and_level= ((Item_cond*) cond)->functype() ==
15172                       Item_func::COND_AND_FUNC;
15173     if (and_level)
15174     {
15175       cond_equal= &((Item_cond_and *) cond)->m_cond_equal;
15176       cond_list->disjoin((List<Item> *) &cond_equal->current_level);/* remove Item_equal objects from the AND. */
15177 
15178       List_iterator_fast<Item_equal> it(cond_equal->current_level);
15179       while ((item_equal= it++))
15180       {
15181         item_equal->sort(&compare_fields_by_table_order, table_join_idx);
15182       }
15183     }
15184 
15185     List_iterator<Item> li(*cond_list);
15186     Item *item;
15187     while ((item= li++))
15188     {
15189       Item *new_item= substitute_for_best_equal_field(thd, context_tab,
15190                                                       item, cond_equal,
15191                                                       table_join_idx);
15192       /*
15193         This works OK with PS/SP re-execution as changes are made to
15194         the arguments of AND/OR items only
15195       */
15196       if (new_item && new_item != item)
15197         li.replace(new_item);
15198     }
15199 
15200     if (and_level)
15201     {
15202       COND *eq_cond= 0;
15203       List_iterator_fast<Item_equal> it(cond_equal->current_level);
15204       bool false_eq_cond= FALSE;
15205       while ((item_equal= it++))
15206       {
15207         eq_cond= eliminate_item_equal(thd, eq_cond, cond_equal->upper_levels,
15208                                       item_equal);
15209         if (!eq_cond)
15210 	{
15211           eq_cond= 0;
15212           break;
15213         }
15214         else if (eq_cond->type() == Item::INT_ITEM && !eq_cond->val_bool())
15215 	{
15216           /*
15217             This occurs when eliminate_item_equal() founds that cond is
15218             always false and substitutes it with Item_int 0.
15219             Due to this, value of item_equal will be 0, so just return it.
15220 	  */
15221           cond= eq_cond;
15222           false_eq_cond= TRUE;
15223           break;
15224         }
15225       }
15226       if (eq_cond && !false_eq_cond)
15227       {
15228         /* Insert the generated equalities before all other conditions */
15229         if (eq_cond->type() == Item::COND_ITEM)
15230           ((Item_cond *) cond)->add_at_head(
15231                                   ((Item_cond *) eq_cond)->argument_list());
15232         else
15233 	{
15234           if (cond_list->is_empty())
15235             cond= eq_cond;
15236           else
15237 	  {
15238              /* Do not add an equality condition if it's always true */
15239              if (eq_cond->type() != Item::INT_ITEM &&
15240                  cond_list->push_front(eq_cond, thd->mem_root))
15241                eq_cond= 0;
15242           }
15243 	}
15244       }
15245       if (!eq_cond)
15246       {
15247         /*
15248           We are out of memory doing the transformation.
15249           This is a fatal error now. However we bail out by returning the
15250           original condition that we had before we started the transformation.
15251 	*/
15252 	cond_list->append((List<Item> *) &cond_equal->current_level);
15253       }
15254     }
15255   }
15256   else if (cond->type() == Item::FUNC_ITEM &&
15257            ((Item_func*) cond)->functype() == Item_func::MULT_EQUAL_FUNC)
15258   {
15259     item_equal= (Item_equal *) cond;
15260     item_equal->sort(&compare_fields_by_table_order, table_join_idx);
15261     cond_equal= item_equal->upper_levels;
15262     if (cond_equal && cond_equal->current_level.head() == item_equal)
15263       cond_equal= cond_equal->upper_levels;
15264     cond= eliminate_item_equal(thd, 0, cond_equal, item_equal);
15265     return cond ? cond : org_cond;
15266   }
15267   else
15268   {
15269     while (cond_equal)
15270     {
15271       List_iterator_fast<Item_equal> it(cond_equal->current_level);
15272       while((item_equal= it++))
15273       {
15274         REPLACE_EQUAL_FIELD_ARG arg= {item_equal, context_tab};
15275         if (!(cond= cond->transform(thd, &Item::replace_equal_field,
15276                                     (uchar *) &arg)))
15277           return 0;
15278       }
15279       cond_equal= cond_equal->upper_levels;
15280     }
15281   }
15282   return cond;
15283 }
15284 
15285 
15286 /**
15287   Check appearance of new constant items in multiple equalities
15288   of a condition after reading a constant table.
15289 
15290     The function retrieves the cond condition and for each encountered
15291     multiple equality checks whether new constants have appeared after
15292     reading the constant (single row) table tab. If so it adjusts
15293     the multiple equality appropriately.
15294 
15295   @param cond       condition whose multiple equalities are to be checked
15296   @param table      constant table that has been read
15297   @param const_key  mark key parts as constant
15298 */
15299 
update_const_equal_items(THD * thd,COND * cond,JOIN_TAB * tab,bool const_key)15300 static void update_const_equal_items(THD *thd, COND *cond, JOIN_TAB *tab,
15301                                      bool const_key)
15302 {
15303   if (!(cond->used_tables() & tab->table->map))
15304     return;
15305 
15306   if (cond->type() == Item::COND_ITEM)
15307   {
15308     List<Item> *cond_list= ((Item_cond*) cond)->argument_list();
15309     List_iterator_fast<Item> li(*cond_list);
15310     Item *item;
15311     while ((item= li++))
15312       update_const_equal_items(thd, item, tab,
15313                                (((Item_cond*) cond)->top_level() &&
15314                                 ((Item_cond*) cond)->functype() ==
15315                                 Item_func::COND_AND_FUNC));
15316   }
15317   else if (cond->type() == Item::FUNC_ITEM &&
15318            ((Item_cond*) cond)->functype() == Item_func::MULT_EQUAL_FUNC)
15319   {
15320     Item_equal *item_equal= (Item_equal *) cond;
15321     bool contained_const= item_equal->get_const() != NULL;
15322     item_equal->update_const(thd);
15323     if (!contained_const && item_equal->get_const())
15324     {
15325       /* Update keys for range analysis */
15326       Item_equal_fields_iterator it(*item_equal);
15327       while (it++)
15328       {
15329         Field *field= it.get_curr_field();
15330         JOIN_TAB *stat= field->table->reginfo.join_tab;
15331         key_map possible_keys= field->key_start;
15332         possible_keys.intersect(field->table->keys_in_use_for_query);
15333         stat[0].const_keys.merge(possible_keys);
15334 
15335         /*
15336           For each field in the multiple equality (for which we know that it
15337           is a constant) we have to find its corresponding key part, and set
15338           that key part in const_key_parts.
15339         */
15340         if (!possible_keys.is_clear_all())
15341         {
15342           TABLE *field_tab= field->table;
15343           KEYUSE *use;
15344           for (use= stat->keyuse; use && use->table == field_tab; use++)
15345             if (const_key &&
15346                 !use->is_for_hash_join() && possible_keys.is_set(use->key) &&
15347                 field_tab->key_info[use->key].key_part[use->keypart].field ==
15348                 field)
15349               field_tab->const_key_parts[use->key]|= use->keypart_map;
15350         }
15351       }
15352     }
15353   }
15354 }
15355 
15356 
15357 /**
15358   Check if
15359     WHERE expr=value AND expr=const
15360   can be rewritten as:
15361     WHERE const=value AND expr=const
15362 
15363   @param target       - the target operator whose "expr" argument will be
15364                         replaced to "const".
15365   @param target_expr  - the target's "expr" which will be replaced to "const".
15366   @param target_value - the target's second argument, it will remain unchanged.
15367   @param source       - the equality expression ("=" or "<=>") that
15368                         can be used to rewrite the "target" part
15369                         (under certain conditions, see the code).
15370   @param source_expr  - the source's "expr". It should be exactly equal to
15371                         the target's "expr" to make condition rewrite possible.
15372   @param source_const - the source's "const" argument, it will be inserted
15373                         into "target" instead of "expr".
15374 */
15375 static bool
can_change_cond_ref_to_const(Item_bool_func2 * target,Item * target_expr,Item * target_value,Item_bool_func2 * source,Item * source_expr,Item * source_const)15376 can_change_cond_ref_to_const(Item_bool_func2 *target,
15377                              Item *target_expr, Item *target_value,
15378                              Item_bool_func2 *source,
15379                              Item *source_expr, Item *source_const)
15380 {
15381   return target_expr->eq(source_expr,0) &&
15382          target_value != source_const &&
15383          target->compare_type_handler()->
15384            can_change_cond_ref_to_const(target, target_expr, target_value,
15385                                         source, source_expr, source_const);
15386 }
15387 
15388 
15389 /*
15390   change field = field to field = const for each found field = const in the
15391   and_level
15392 */
15393 
15394 static void
change_cond_ref_to_const(THD * thd,I_List<COND_CMP> * save_list,Item * and_father,Item * cond,Item_bool_func2 * field_value_owner,Item * field,Item * value)15395 change_cond_ref_to_const(THD *thd, I_List<COND_CMP> *save_list,
15396                          Item *and_father, Item *cond,
15397                          Item_bool_func2 *field_value_owner,
15398                          Item *field, Item *value)
15399 {
15400   if (cond->type() == Item::COND_ITEM)
15401   {
15402     bool and_level= ((Item_cond*) cond)->functype() ==
15403       Item_func::COND_AND_FUNC;
15404     List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
15405     Item *item;
15406     while ((item=li++))
15407       change_cond_ref_to_const(thd, save_list,and_level ? cond : item, item,
15408 			       field_value_owner, field, value);
15409     return;
15410   }
15411   if (cond->eq_cmp_result() == Item::COND_OK)
15412     return;					// Not a boolean function
15413 
15414   Item_bool_func2 *func=  (Item_bool_func2*) cond;
15415   Item **args= func->arguments();
15416   Item *left_item=  args[0];
15417   Item *right_item= args[1];
15418   Item_func::Functype functype=  func->functype();
15419 
15420   if (can_change_cond_ref_to_const(func, right_item, left_item,
15421                                    field_value_owner, field, value))
15422   {
15423     Item *tmp=value->clone_item(thd);
15424     if (tmp)
15425     {
15426       tmp->collation.set(right_item->collation);
15427       thd->change_item_tree(args + 1, tmp);
15428       func->update_used_tables();
15429       if ((functype == Item_func::EQ_FUNC || functype == Item_func::EQUAL_FUNC)
15430 	  && and_father != cond && !left_item->const_item())
15431       {
15432 	cond->marker=1;
15433 	COND_CMP *tmp2;
15434         /* Will work, even if malloc would fail */
15435         if ((tmp2= new (thd->mem_root) COND_CMP(and_father, func)))
15436 	  save_list->push_back(tmp2);
15437       }
15438       /*
15439         LIKE can be optimized for BINARY/VARBINARY/BLOB columns, e.g.:
15440 
15441         from: WHERE CONCAT(c1)='const1' AND CONCAT(c1) LIKE 'const2'
15442           to: WHERE CONCAT(c1)='const1' AND 'const1' LIKE 'const2'
15443 
15444         So make sure to use set_cmp_func() only for non-LIKE operators.
15445       */
15446       if (functype != Item_func::LIKE_FUNC)
15447         ((Item_bool_rowready_func2*) func)->set_cmp_func();
15448     }
15449   }
15450   else if (can_change_cond_ref_to_const(func, left_item, right_item,
15451                                         field_value_owner, field, value))
15452   {
15453     Item *tmp= value->clone_item(thd);
15454     if (tmp)
15455     {
15456       tmp->collation.set(left_item->collation);
15457       thd->change_item_tree(args, tmp);
15458       value= tmp;
15459       func->update_used_tables();
15460       if ((functype == Item_func::EQ_FUNC || functype == Item_func::EQUAL_FUNC)
15461 	  && and_father != cond && !right_item->const_item())
15462       {
15463         args[0]= args[1];                       // For easy check
15464         thd->change_item_tree(args + 1, value);
15465 	cond->marker=1;
15466 	COND_CMP *tmp2;
15467         /* Will work, even if malloc would fail */
15468         if ((tmp2=new (thd->mem_root) COND_CMP(and_father, func)))
15469 	  save_list->push_back(tmp2);
15470       }
15471       if (functype != Item_func::LIKE_FUNC)
15472         ((Item_bool_rowready_func2*) func)->set_cmp_func();
15473     }
15474   }
15475 }
15476 
15477 
15478 static void
propagate_cond_constants(THD * thd,I_List<COND_CMP> * save_list,COND * and_father,COND * cond)15479 propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list,
15480                          COND *and_father, COND *cond)
15481 {
15482   if (cond->type() == Item::COND_ITEM)
15483   {
15484     bool and_level= ((Item_cond*) cond)->functype() ==
15485       Item_func::COND_AND_FUNC;
15486     List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
15487     Item *item;
15488     I_List<COND_CMP> save;
15489     while ((item=li++))
15490     {
15491       propagate_cond_constants(thd, &save,and_level ? cond : item, item);
15492     }
15493     if (and_level)
15494     {						// Handle other found items
15495       I_List_iterator<COND_CMP> cond_itr(save);
15496       COND_CMP *cond_cmp;
15497       while ((cond_cmp=cond_itr++))
15498       {
15499         Item **args= cond_cmp->cmp_func->arguments();
15500         if (!args[0]->const_item())
15501           change_cond_ref_to_const(thd, &save,cond_cmp->and_level,
15502                                    cond_cmp->and_level,
15503                                    cond_cmp->cmp_func, args[0], args[1]);
15504       }
15505     }
15506   }
15507   else if (and_father != cond && !cond->marker)		// In a AND group
15508   {
15509     if (cond->type() == Item::FUNC_ITEM &&
15510 	(((Item_func*) cond)->functype() == Item_func::EQ_FUNC ||
15511 	 ((Item_func*) cond)->functype() == Item_func::EQUAL_FUNC))
15512     {
15513       Item_func_eq *func=(Item_func_eq*) cond;
15514       Item **args= func->arguments();
15515       bool left_const= args[0]->const_item() && !args[0]->is_expensive();
15516       bool right_const= args[1]->const_item() && !args[1]->is_expensive();
15517       if (!(left_const && right_const) &&
15518           args[0]->cmp_type() == args[1]->cmp_type())
15519       {
15520 	if (right_const)
15521 	{
15522           resolve_const_item(thd, &args[1], args[0]);
15523 	  func->update_used_tables();
15524           change_cond_ref_to_const(thd, save_list, and_father, and_father,
15525                                    func, args[0], args[1]);
15526 	}
15527 	else if (left_const)
15528 	{
15529           resolve_const_item(thd, &args[0], args[1]);
15530 	  func->update_used_tables();
15531           change_cond_ref_to_const(thd, save_list, and_father, and_father,
15532                                    func, args[1], args[0]);
15533 	}
15534       }
15535     }
15536   }
15537 }
15538 
15539 /**
15540   Simplify joins replacing outer joins by inner joins whenever it's
15541   possible.
15542 
15543     The function, during a retrieval of join_list,  eliminates those
15544     outer joins that can be converted into inner join, possibly nested.
15545     It also moves the on expressions for the converted outer joins
15546     and from inner joins to conds.
15547     The function also calculates some attributes for nested joins:
15548     - used_tables
15549     - not_null_tables
15550     - dep_tables.
15551     - on_expr_dep_tables
15552     The first two attributes are used to test whether an outer join can
15553     be substituted for an inner join. The third attribute represents the
15554     relation 'to be dependent on' for tables. If table t2 is dependent
15555     on table t1, then in any evaluated execution plan table access to
15556     table t2 must precede access to table t2. This relation is used also
15557     to check whether the query contains  invalid cross-references.
15558     The forth attribute is an auxiliary one and is used to calculate
15559     dep_tables.
15560     As the attribute dep_tables qualifies possibles orders of tables in the
15561     execution plan, the dependencies required by the straight join
15562     modifiers are reflected in this attribute as well.
15563     The function also removes all braces that can be removed from the join
15564     expression without changing its meaning.
15565 
15566   @note
15567     An outer join can be replaced by an inner join if the where condition
15568     or the on expression for an embedding nested join contains a conjunctive
15569     predicate rejecting null values for some attribute of the inner tables.
15570 
15571     E.g. in the query:
15572     @code
15573       SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a WHERE t2.b < 5
15574     @endcode
15575     the predicate t2.b < 5 rejects nulls.
15576     The query is converted first to:
15577     @code
15578       SELECT * FROM t1 INNER JOIN t2 ON t2.a=t1.a WHERE t2.b < 5
15579     @endcode
15580     then to the equivalent form:
15581     @code
15582       SELECT * FROM t1, t2 ON t2.a=t1.a WHERE t2.b < 5 AND t2.a=t1.a
15583     @endcode
15584 
15585 
15586     Similarly the following query:
15587     @code
15588       SELECT * from t1 LEFT JOIN (t2, t3) ON t2.a=t1.a t3.b=t1.b
15589         WHERE t2.c < 5
15590     @endcode
15591     is converted to:
15592     @code
15593       SELECT * FROM t1, (t2, t3) WHERE t2.c < 5 AND t2.a=t1.a t3.b=t1.b
15594 
15595     @endcode
15596 
15597     One conversion might trigger another:
15598     @code
15599       SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a
15600                        LEFT JOIN t3 ON t3.b=t2.b
15601         WHERE t3 IS NOT NULL =>
15602       SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a, t3
15603         WHERE t3 IS NOT NULL AND t3.b=t2.b =>
15604       SELECT * FROM t1, t2, t3
15605         WHERE t3 IS NOT NULL AND t3.b=t2.b AND t2.a=t1.a
15606   @endcode
15607 
15608     The function removes all unnecessary braces from the expression
15609     produced by the conversions.
15610     E.g.
15611     @code
15612       SELECT * FROM t1, (t2, t3) WHERE t2.c < 5 AND t2.a=t1.a AND t3.b=t1.b
15613     @endcode
15614     finally is converted to:
15615     @code
15616       SELECT * FROM t1, t2, t3 WHERE t2.c < 5 AND t2.a=t1.a AND t3.b=t1.b
15617 
15618     @endcode
15619 
15620 
15621     It also will remove braces from the following queries:
15622     @code
15623       SELECT * from (t1 LEFT JOIN t2 ON t2.a=t1.a) LEFT JOIN t3 ON t3.b=t2.b
15624       SELECT * from (t1, (t2,t3)) WHERE t1.a=t2.a AND t2.b=t3.b.
15625     @endcode
15626 
15627     The benefit of this simplification procedure is that it might return
15628     a query for which the optimizer can evaluate execution plan with more
15629     join orders. With a left join operation the optimizer does not
15630     consider any plan where one of the inner tables is before some of outer
15631     tables.
15632 
15633   IMPLEMENTATION
15634     The function is implemented by a recursive procedure.  On the recursive
15635     ascent all attributes are calculated, all outer joins that can be
15636     converted are replaced and then all unnecessary braces are removed.
15637     As join list contains join tables in the reverse order sequential
15638     elimination of outer joins does not require extra recursive calls.
15639 
15640   SEMI-JOIN NOTES
15641     Remove all semi-joins that have are within another semi-join (i.e. have
15642     an "ancestor" semi-join nest)
15643 
15644   EXAMPLES
15645     Here is an example of a join query with invalid cross references:
15646     @code
15647       SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t3.a LEFT JOIN t3 ON t3.b=t1.b
15648     @endcode
15649 
15650   @param join        reference to the query info
15651   @param join_list   list representation of the join to be converted
15652   @param conds       conditions to add on expressions for converted joins
15653   @param top         true <=> conds is the where condition
15654   @param in_sj       TRUE <=> processing semi-join nest's children
15655   @return
15656     - The new condition, if success
15657     - 0, otherwise
15658 */
15659 
15660 static COND *
simplify_joins(JOIN * join,List<TABLE_LIST> * join_list,COND * conds,bool top,bool in_sj)15661 simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top,
15662                bool in_sj)
15663 {
15664   TABLE_LIST *table;
15665   NESTED_JOIN *nested_join;
15666   TABLE_LIST *prev_table= 0;
15667   List_iterator<TABLE_LIST> li(*join_list);
15668   bool straight_join= MY_TEST(join->select_options & SELECT_STRAIGHT_JOIN);
15669   DBUG_ENTER("simplify_joins");
15670 
15671   /*
15672     Try to simplify join operations from join_list.
15673     The most outer join operation is checked for conversion first.
15674   */
15675   while ((table= li++))
15676   {
15677     table_map used_tables;
15678     table_map not_null_tables= (table_map) 0;
15679 
15680     if ((nested_join= table->nested_join))
15681     {
15682       /*
15683          If the element of join_list is a nested join apply
15684          the procedure to its nested join list first.
15685       */
15686       if (table->on_expr)
15687       {
15688         Item *expr= table->on_expr;
15689         /*
15690            If an on expression E is attached to the table,
15691            check all null rejected predicates in this expression.
15692            If such a predicate over an attribute belonging to
15693            an inner table of an embedded outer join is found,
15694            the outer join is converted to an inner join and
15695            the corresponding on expression is added to E.
15696 	*/
15697         expr= simplify_joins(join, &nested_join->join_list,
15698                              expr, FALSE, in_sj || table->sj_on_expr);
15699 
15700         if (!table->prep_on_expr || expr != table->on_expr)
15701         {
15702           DBUG_ASSERT(expr);
15703 
15704           table->on_expr= expr;
15705           table->prep_on_expr= expr->copy_andor_structure(join->thd);
15706         }
15707       }
15708       nested_join->used_tables= (table_map) 0;
15709       nested_join->not_null_tables=(table_map) 0;
15710       conds= simplify_joins(join, &nested_join->join_list, conds, top,
15711                             in_sj || table->sj_on_expr);
15712       used_tables= nested_join->used_tables;
15713       not_null_tables= nested_join->not_null_tables;
15714       /* The following two might become unequal after table elimination: */
15715       nested_join->n_tables= nested_join->join_list.elements;
15716     }
15717     else
15718     {
15719       if (!table->prep_on_expr)
15720         table->prep_on_expr= table->on_expr;
15721       used_tables= table->get_map();
15722       if (conds)
15723         not_null_tables= conds->not_null_tables();
15724     }
15725 
15726     if (table->embedding)
15727     {
15728       table->embedding->nested_join->used_tables|= used_tables;
15729       table->embedding->nested_join->not_null_tables|= not_null_tables;
15730     }
15731 
15732     if (!(table->outer_join & (JOIN_TYPE_LEFT | JOIN_TYPE_RIGHT)) ||
15733         (used_tables & not_null_tables))
15734     {
15735       /*
15736         For some of the inner tables there are conjunctive predicates
15737         that reject nulls => the outer join can be replaced by an inner join.
15738       */
15739       if (table->outer_join && !table->embedding && table->table)
15740         table->table->maybe_null= FALSE;
15741       table->outer_join= 0;
15742       if (!(straight_join || table->straight))
15743       {
15744         table->dep_tables= 0;
15745         TABLE_LIST *embedding= table->embedding;
15746         while (embedding)
15747         {
15748           if (embedding->nested_join->join_list.head()->outer_join)
15749           {
15750             if (!embedding->sj_subq_pred)
15751               table->dep_tables= embedding->dep_tables;
15752             break;
15753           }
15754           embedding= embedding->embedding;
15755         }
15756       }
15757       if (table->on_expr)
15758       {
15759         /* Add ON expression to the WHERE or upper-level ON condition. */
15760         if (conds)
15761         {
15762           conds= and_conds(join->thd, conds, table->on_expr);
15763           conds->top_level_item();
15764           /* conds is always a new item as both cond and on_expr existed */
15765           DBUG_ASSERT(!conds->fixed);
15766           conds->fix_fields(join->thd, &conds);
15767         }
15768         else
15769           conds= table->on_expr;
15770         table->prep_on_expr= table->on_expr= 0;
15771       }
15772     }
15773 
15774     /*
15775       Only inner tables of non-convertible outer joins
15776       remain with on_expr.
15777     */
15778     if (table->on_expr)
15779     {
15780       table_map table_on_expr_used_tables= table->on_expr->used_tables();
15781       table->dep_tables|= table_on_expr_used_tables;
15782       if (table->embedding)
15783       {
15784         table->dep_tables&= ~table->embedding->nested_join->used_tables;
15785         /*
15786            Embedding table depends on tables used
15787            in embedded on expressions.
15788         */
15789         table->embedding->on_expr_dep_tables|= table_on_expr_used_tables;
15790       }
15791       else
15792         table->dep_tables&= ~table->get_map();
15793     }
15794 
15795     if (prev_table)
15796     {
15797       /* The order of tables is reverse: prev_table follows table */
15798       if (prev_table->straight || straight_join)
15799         prev_table->dep_tables|= used_tables;
15800       if (prev_table->on_expr)
15801       {
15802         prev_table->dep_tables|= table->on_expr_dep_tables;
15803         table_map prev_used_tables= prev_table->nested_join ?
15804 	                            prev_table->nested_join->used_tables :
15805 	                            prev_table->get_map();
15806         /*
15807           If on expression contains only references to inner tables
15808           we still make the inner tables dependent on the outer tables.
15809           It would be enough to set dependency only on one outer table
15810           for them. Yet this is really a rare case.
15811           Note:
15812           RAND_TABLE_BIT mask should not be counted as it
15813           prevents update of inner table dependences.
15814           For example it might happen if RAND() function
15815           is used in JOIN ON clause.
15816 	*/
15817         if (!((prev_table->on_expr->used_tables() &
15818                ~(OUTER_REF_TABLE_BIT | RAND_TABLE_BIT)) &
15819               ~prev_used_tables))
15820           prev_table->dep_tables|= used_tables;
15821       }
15822     }
15823     prev_table= table;
15824   }
15825 
15826   /*
15827     Flatten nested joins that can be flattened.
15828     no ON expression and not a semi-join => can be flattened.
15829   */
15830   li.rewind();
15831   while ((table= li++))
15832   {
15833     nested_join= table->nested_join;
15834     if (table->sj_on_expr && !in_sj)
15835     {
15836       /*
15837         If this is a semi-join that is not contained within another semi-join
15838         leave it intact (otherwise it is flattened)
15839       */
15840       /*
15841         Make sure that any semi-join appear in
15842         the join->select_lex->sj_nests list only once
15843       */
15844       List_iterator_fast<TABLE_LIST> sj_it(join->select_lex->sj_nests);
15845       TABLE_LIST *sj_nest;
15846       while ((sj_nest= sj_it++))
15847       {
15848         if (table == sj_nest)
15849           break;
15850       }
15851       if (sj_nest)
15852         continue;
15853       join->select_lex->sj_nests.push_back(table, join->thd->mem_root);
15854 
15855       /*
15856         Also, walk through semi-join children and mark those that are now
15857         top-level
15858       */
15859       TABLE_LIST *tbl;
15860       List_iterator<TABLE_LIST> it(nested_join->join_list);
15861       while ((tbl= it++))
15862       {
15863         if (!tbl->on_expr && tbl->table)
15864           tbl->table->maybe_null= FALSE;
15865       }
15866     }
15867     else if (nested_join && !table->on_expr)
15868     {
15869       TABLE_LIST *tbl;
15870       List_iterator<TABLE_LIST> it(nested_join->join_list);
15871       List<TABLE_LIST> repl_list;
15872       while ((tbl= it++))
15873       {
15874         tbl->embedding= table->embedding;
15875         if (!tbl->embedding && !tbl->on_expr && tbl->table)
15876           tbl->table->maybe_null= FALSE;
15877         tbl->join_list= table->join_list;
15878         repl_list.push_back(tbl, join->thd->mem_root);
15879         tbl->dep_tables|= table->dep_tables;
15880       }
15881       li.replace(repl_list);
15882     }
15883   }
15884   DBUG_RETURN(conds);
15885 }
15886 
15887 
15888 /**
15889   Assign each nested join structure a bit in nested_join_map.
15890 
15891     Assign each nested join structure (except ones that embed only one element
15892     and so are redundant) a bit in nested_join_map.
15893 
15894   @param join          Join being processed
15895   @param join_list     List of tables
15896   @param first_unused  Number of first unused bit in nested_join_map before the
15897                        call
15898 
15899   @note
15900     This function is called after simplify_joins(), when there are no
15901     redundant nested joins, #non_redundant_nested_joins <= #tables_in_join so
15902     we will not run out of bits in nested_join_map.
15903 
15904   @return
15905     First unused bit in nested_join_map after the call.
15906 */
15907 
build_bitmap_for_nested_joins(List<TABLE_LIST> * join_list,uint first_unused)15908 static uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list,
15909                                           uint first_unused)
15910 {
15911   List_iterator<TABLE_LIST> li(*join_list);
15912   TABLE_LIST *table;
15913   DBUG_ENTER("build_bitmap_for_nested_joins");
15914   while ((table= li++))
15915   {
15916     NESTED_JOIN *nested_join;
15917     if ((nested_join= table->nested_join))
15918     {
15919       /*
15920         It is guaranteed by simplify_joins() function that a nested join
15921         that has only one child represents a single table VIEW (and the child
15922         is an underlying table). We don't assign bits to such nested join
15923         structures because
15924         1. it is redundant (a "sequence" of one table cannot be interleaved
15925             with anything)
15926         2. we could run out bits in nested_join_map otherwise.
15927       */
15928       if (nested_join->n_tables != 1)
15929       {
15930         /* Don't assign bits to sj-nests */
15931         if (table->on_expr)
15932           nested_join->nj_map= (nested_join_map) 1 << first_unused++;
15933         first_unused= build_bitmap_for_nested_joins(&nested_join->join_list,
15934                                                     first_unused);
15935       }
15936     }
15937   }
15938   DBUG_RETURN(first_unused);
15939 }
15940 
15941 
15942 /**
15943   Set NESTED_JOIN::counter and n_tables in all nested joins in passed list.
15944 
15945   For all nested joins contained in the passed join_list (including its
15946   children), set:
15947    - nested_join->counter=0
15948    - nested_join->n_tables= {number of non-degenerate direct children}.
15949 
15950   Non-degenerate means non-const base table or a join nest that has a
15951   non-degenerate child.
15952 
15953   @param join_list  List of nested joins to process. It may also contain base
15954                     tables which will be ignored.
15955 */
15956 
reset_nj_counters(JOIN * join,List<TABLE_LIST> * join_list)15957 static uint reset_nj_counters(JOIN *join, List<TABLE_LIST> *join_list)
15958 {
15959   List_iterator<TABLE_LIST> li(*join_list);
15960   TABLE_LIST *table;
15961   DBUG_ENTER("reset_nj_counters");
15962   uint n=0;
15963   while ((table= li++))
15964   {
15965     NESTED_JOIN *nested_join;
15966     bool is_eliminated_nest= FALSE;
15967     if ((nested_join= table->nested_join))
15968     {
15969       nested_join->counter= 0;
15970       nested_join->n_tables= reset_nj_counters(join, &nested_join->join_list);
15971       if (!nested_join->n_tables)
15972         is_eliminated_nest= TRUE;
15973     }
15974     const table_map removed_tables= join->eliminated_tables |
15975                                     join->const_table_map;
15976 
15977     if ((table->nested_join && !is_eliminated_nest) ||
15978         (!table->nested_join && (table->table->map & ~removed_tables)))
15979       n++;
15980   }
15981   DBUG_RETURN(n);
15982 }
15983 
15984 
15985 /**
15986   Check interleaving with an inner tables of an outer join for
15987   extension table.
15988 
15989     Check if table next_tab can be added to current partial join order, and
15990     if yes, record that it has been added.
15991 
15992     The function assumes that both current partial join order and its
15993     extension with next_tab are valid wrt table dependencies.
15994 
15995   @verbatim
15996      IMPLEMENTATION
15997        LIMITATIONS ON JOIN ORDER
15998          The nested [outer] joins executioner algorithm imposes these limitations
15999          on join order:
16000          1. "Outer tables first" -  any "outer" table must be before any
16001              corresponding "inner" table.
16002          2. "No interleaving" - tables inside a nested join must form a continuous
16003             sequence in join order (i.e. the sequence must not be interrupted by
16004             tables that are outside of this nested join).
16005 
16006          #1 is checked elsewhere, this function checks #2 provided that #1 has
16007          been already checked.
16008 
16009        WHY NEED NON-INTERLEAVING
16010          Consider an example:
16011 
16012            select * from t0 join t1 left join (t2 join t3) on cond1
16013 
16014          The join order "t1 t2 t0 t3" is invalid:
16015 
16016          table t0 is outside of the nested join, so WHERE condition for t0 is
16017          attached directly to t0 (without triggers, and it may be used to access
16018          t0). Applying WHERE(t0) to (t2,t0,t3) record is invalid as we may miss
16019          combinations of (t1, t2, t3) that satisfy condition cond1, and produce a
16020          null-complemented (t1, t2.NULLs, t3.NULLs) row, which should not have
16021          been produced.
16022 
16023          If table t0 is not between t2 and t3, the problem doesn't exist:
16024           If t0 is located after (t2,t3), WHERE(t0) is applied after nested join
16025            processing has finished.
16026           If t0 is located before (t2,t3), predicates like WHERE_cond(t0, t2) are
16027            wrapped into condition triggers, which takes care of correct nested
16028            join processing.
16029 
16030        HOW IT IS IMPLEMENTED
16031          The limitations on join order can be rephrased as follows: for valid
16032          join order one must be able to:
16033            1. write down the used tables in the join order on one line.
16034            2. for each nested join, put one '(' and one ')' on the said line
16035            3. write "LEFT JOIN" and "ON (...)" where appropriate
16036            4. get a query equivalent to the query we're trying to execute.
16037 
16038          Calls to check_interleaving_with_nj() are equivalent to writing the
16039          above described line from left to right.
16040          A single check_interleaving_with_nj(A,B) call is equivalent to writing
16041          table B and appropriate brackets on condition that table A and
16042          appropriate brackets is the last what was written. Graphically the
16043          transition is as follows:
16044 
16045                               +---- current position
16046                               |
16047              ... last_tab ))) | ( next_tab )  )..) | ...
16048                                 X          Y   Z   |
16049                                                    +- need to move to this
16050                                                       position.
16051 
16052          Notes about the position:
16053            The caller guarantees that there is no more then one X-bracket by
16054            checking "!(remaining_tables & s->dependent)" before calling this
16055            function. X-bracket may have a pair in Y-bracket.
16056 
16057          When "writing" we store/update this auxilary info about the current
16058          position:
16059           1. join->cur_embedding_map - bitmap of pairs of brackets (aka nested
16060              joins) we've opened but didn't close.
16061           2. {each NESTED_JOIN structure not simplified away}->counter - number
16062              of this nested join's children that have already been added to to
16063              the partial join order.
16064   @endverbatim
16065 
16066   @param next_tab   Table we're going to extend the current partial join with
16067 
16068   @retval
16069     FALSE  Join order extended, nested joins info about current join
16070     order (see NOTE section) updated.
16071   @retval
16072     TRUE   Requested join order extension not allowed.
16073 */
16074 
check_interleaving_with_nj(JOIN_TAB * next_tab)16075 static bool check_interleaving_with_nj(JOIN_TAB *next_tab)
16076 {
16077   TABLE_LIST *next_emb= next_tab->table->pos_in_table_list->embedding;
16078   JOIN *join= next_tab->join;
16079 
16080   if (join->cur_embedding_map & ~next_tab->embedding_map)
16081   {
16082     /*
16083       next_tab is outside of the "pair of brackets" we're currently in.
16084       Cannot add it.
16085     */
16086     return TRUE;
16087   }
16088 
16089   /*
16090     Do update counters for "pairs of brackets" that we've left (marked as
16091     X,Y,Z in the above picture)
16092   */
16093   for (;next_emb && next_emb != join->emb_sjm_nest; next_emb= next_emb->embedding)
16094   {
16095     if (!next_emb->sj_on_expr)
16096     {
16097       next_emb->nested_join->counter++;
16098       if (next_emb->nested_join->counter == 1)
16099       {
16100         /*
16101           next_emb is the first table inside a nested join we've "entered". In
16102           the picture above, we're looking at the 'X' bracket. Don't exit yet as
16103           X bracket might have Y pair bracket.
16104         */
16105         join->cur_embedding_map |= next_emb->nested_join->nj_map;
16106       }
16107 
16108       if (next_emb->nested_join->n_tables !=
16109           next_emb->nested_join->counter)
16110         break;
16111 
16112       /*
16113         We're currently at Y or Z-bracket as depicted in the above picture.
16114         Mark that we've left it and continue walking up the brackets hierarchy.
16115       */
16116       join->cur_embedding_map &= ~next_emb->nested_join->nj_map;
16117     }
16118   }
16119   return FALSE;
16120 }
16121 
16122 
16123 /**
16124   Nested joins perspective: Remove the last table from the join order.
16125 
16126   The algorithm is the reciprocal of check_interleaving_with_nj(), hence
16127   parent join nest nodes are updated only when the last table in its child
16128   node is removed. The ASCII graphic below will clarify.
16129 
16130   %A table nesting such as <tt> t1 x [ ( t2 x t3 ) x ( t4 x t5 ) ] </tt>is
16131   represented by the below join nest tree.
16132 
16133   @verbatim
16134                      NJ1
16135                   _/ /  \
16136                 _/  /    NJ2
16137               _/   /     / \
16138              /    /     /   \
16139    t1 x [ (t2 x t3) x (t4 x t5) ]
16140   @endverbatim
16141 
16142   At the point in time when check_interleaving_with_nj() adds the table t5 to
16143   the query execution plan, QEP, it also directs the node named NJ2 to mark
16144   the table as covered. NJ2 does so by incrementing its @c counter
16145   member. Since all of NJ2's tables are now covered by the QEP, the algorithm
16146   proceeds up the tree to NJ1, incrementing its counter as well. All join
16147   nests are now completely covered by the QEP.
16148 
16149   restore_prev_nj_state() does the above in reverse. As seen above, the node
16150   NJ1 contains the nodes t2, t3, and NJ2. Its counter being equal to 3 means
16151   that the plan covers t2, t3, and NJ2, @e and that the sub-plan (t4 x t5)
16152   completely covers NJ2. The removal of t5 from the partial plan will first
16153   decrement NJ2's counter to 1. It will then detect that NJ2 went from being
16154   completely to partially covered, and hence the algorithm must continue
16155   upwards to NJ1 and decrement its counter to 2. %A subsequent removal of t4
16156   will however not influence NJ1 since it did not un-cover the last table in
16157   NJ2.
16158 
16159   SYNOPSIS
16160     restore_prev_nj_state()
16161       last  join table to remove, it is assumed to be the last in current
16162             partial join order.
16163 
16164   DESCRIPTION
16165 
16166     Remove the last table from the partial join order and update the nested
16167     joins counters and join->cur_embedding_map. It is ok to call this
16168     function for the first table in join order (for which
16169     check_interleaving_with_nj has not been called)
16170 
16171   @param last  join table to remove, it is assumed to be the last in current
16172                partial join order.
16173 */
16174 
restore_prev_nj_state(JOIN_TAB * last)16175 static void restore_prev_nj_state(JOIN_TAB *last)
16176 {
16177   TABLE_LIST *last_emb= last->table->pos_in_table_list->embedding;
16178   JOIN *join= last->join;
16179   for (;last_emb != NULL && last_emb != join->emb_sjm_nest;
16180        last_emb= last_emb->embedding)
16181   {
16182     if (!last_emb->sj_on_expr)
16183     {
16184       NESTED_JOIN *nest= last_emb->nested_join;
16185       DBUG_ASSERT(nest->counter > 0);
16186 
16187       bool was_fully_covered= nest->is_fully_covered();
16188 
16189       join->cur_embedding_map|= nest->nj_map;
16190 
16191       if (--nest->counter == 0)
16192         join->cur_embedding_map&= ~nest->nj_map;
16193 
16194       if (!was_fully_covered)
16195         break;
16196     }
16197   }
16198 }
16199 
16200 
16201 
16202 /*
16203   Change access methods not to use join buffering and adjust costs accordingly
16204 
16205   SYNOPSIS
16206     optimize_wo_join_buffering()
16207       join
16208       first_tab               The first tab to do re-optimization for
16209       last_tab                The last tab to do re-optimization for
16210       last_remaining_tables   Bitmap of tables that are not in the
16211                               [0...last_tab] join prefix
16212       first_alt               TRUE <=> Use the LooseScan plan for the first_tab
16213       no_jbuf_before          Don't allow to use join buffering before this
16214                               table
16215       reopt_rec_count     OUT New output record count
16216       reopt_cost          OUT New join prefix cost
16217 
16218   DESCRIPTION
16219     Given a join prefix [0; ... first_tab], change the access to the tables
16220     in the [first_tab; last_tab] not to use join buffering. This is needed
16221     because some semi-join strategies cannot be used together with the join
16222     buffering.
16223     In general case the best table order in [first_tab; last_tab] range with
16224     join buffering is different from the best order without join buffering but
16225     we don't try finding a better join order. (TODO ask Igor why did we
16226     chose not to do this in the end. that's actually the difference from the
16227     forking approach)
16228 */
16229 
optimize_wo_join_buffering(JOIN * join,uint first_tab,uint last_tab,table_map last_remaining_tables,bool first_alt,uint no_jbuf_before,double * outer_rec_count,double * reopt_cost)16230 void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab,
16231                                 table_map last_remaining_tables,
16232                                 bool first_alt, uint no_jbuf_before,
16233                                 double *outer_rec_count, double *reopt_cost)
16234 {
16235   double cost, rec_count;
16236   table_map reopt_remaining_tables= last_remaining_tables;
16237   uint i;
16238 
16239   if (first_tab > join->const_tables)
16240   {
16241     cost=      join->positions[first_tab - 1].prefix_cost.total_cost();
16242     rec_count= join->positions[first_tab - 1].prefix_record_count;
16243   }
16244   else
16245   {
16246     cost= 0.0;
16247     rec_count= 1;
16248   }
16249 
16250   *outer_rec_count= rec_count;
16251   for (i= first_tab; i <= last_tab; i++)
16252     reopt_remaining_tables |= join->positions[i].table->table->map;
16253 
16254   /*
16255     best_access_path() optimization depends on the value of
16256     join->cur_sj_inner_tables. Our goal in this function is to do a
16257     re-optimization with disabled join buffering, but no other changes.
16258     In order to achieve this, cur_sj_inner_tables needs have the same
16259     value it had during the original invocations of best_access_path.
16260 
16261     We know that this function, optimize_wo_join_buffering() is called to
16262     re-optimize semi-join join order range, which allows to conclude that
16263     the "original" value of cur_sj_inner_tables was 0.
16264   */
16265   table_map save_cur_sj_inner_tables= join->cur_sj_inner_tables;
16266   join->cur_sj_inner_tables= 0;
16267 
16268   for (i= first_tab; i <= last_tab; i++)
16269   {
16270     JOIN_TAB *rs= join->positions[i].table;
16271     POSITION pos, loose_scan_pos;
16272 
16273     if ((i == first_tab && first_alt) || join->positions[i].use_join_buffer)
16274     {
16275       /* Find the best access method that would not use join buffering */
16276       best_access_path(join, rs, reopt_remaining_tables,
16277                        join->positions, i,
16278                        TRUE, rec_count,
16279                        &pos, &loose_scan_pos);
16280     }
16281     else
16282       pos= join->positions[i];
16283 
16284     if ((i == first_tab && first_alt))
16285       pos= loose_scan_pos;
16286 
16287     reopt_remaining_tables &= ~rs->table->map;
16288     rec_count= COST_MULT(rec_count, pos.records_read);
16289     cost= COST_ADD(cost, pos.read_time);
16290     cost= COST_ADD(cost, rec_count / (double) TIME_FOR_COMPARE);
16291     //TODO: take into account join condition selectivity here
16292     double pushdown_cond_selectivity= 1.0;
16293     table_map real_table_bit= rs->table->map;
16294     if (join->thd->variables.optimizer_use_condition_selectivity > 1)
16295     {
16296       pushdown_cond_selectivity= table_cond_selectivity(join, i, rs,
16297                                                         reopt_remaining_tables &
16298                                                         ~real_table_bit);
16299     }
16300     (*outer_rec_count) *= pushdown_cond_selectivity;
16301     if (!rs->emb_sj_nest)
16302       *outer_rec_count= COST_MULT(*outer_rec_count, pos.records_read);
16303 
16304   }
16305   join->cur_sj_inner_tables= save_cur_sj_inner_tables;
16306 
16307   *reopt_cost= cost;
16308 }
16309 
16310 
16311 static COND *
optimize_cond(JOIN * join,COND * conds,List<TABLE_LIST> * join_list,bool ignore_on_conds,Item::cond_result * cond_value,COND_EQUAL ** cond_equal,int flags)16312 optimize_cond(JOIN *join, COND *conds,
16313               List<TABLE_LIST> *join_list, bool ignore_on_conds,
16314               Item::cond_result *cond_value, COND_EQUAL **cond_equal,
16315               int flags)
16316 {
16317   THD *thd= join->thd;
16318   DBUG_ENTER("optimize_cond");
16319 
16320   if (!conds)
16321   {
16322     *cond_value= Item::COND_TRUE;
16323     if (!ignore_on_conds)
16324       build_equal_items(join, NULL, NULL, join_list, ignore_on_conds,
16325                         cond_equal);
16326   }
16327   else
16328   {
16329     /*
16330       Build all multiple equality predicates and eliminate equality
16331       predicates that can be inferred from these multiple equalities.
16332       For each reference of a field included into a multiple equality
16333       that occurs in a function set a pointer to the multiple equality
16334       predicate. Substitute a constant instead of this field if the
16335       multiple equality contains a constant.
16336     */
16337     DBUG_EXECUTE("where", print_where(conds, "original", QT_ORDINARY););
16338     conds= build_equal_items(join, conds, NULL, join_list,
16339                              ignore_on_conds, cond_equal,
16340                              MY_TEST(flags & OPT_LINK_EQUAL_FIELDS));
16341     DBUG_EXECUTE("where",print_where(conds,"after equal_items", QT_ORDINARY););
16342 
16343     /* change field = field to field = const for each found field = const */
16344     propagate_cond_constants(thd, (I_List<COND_CMP> *) 0, conds, conds);
16345     /*
16346       Remove all instances of item == item
16347       Remove all and-levels where CONST item != CONST item
16348     */
16349     DBUG_EXECUTE("where",print_where(conds,"after const change", QT_ORDINARY););
16350     conds= conds->remove_eq_conds(thd, cond_value, true);
16351     if (conds && conds->type() == Item::COND_ITEM &&
16352         ((Item_cond*) conds)->functype() == Item_func::COND_AND_FUNC)
16353       *cond_equal= &((Item_cond_and*) conds)->m_cond_equal;
16354     DBUG_EXECUTE("info",print_where(conds,"after remove", QT_ORDINARY););
16355   }
16356   DBUG_RETURN(conds);
16357 }
16358 
16359 
16360 /**
16361   @brief
16362   Propagate multiple equalities to the sub-expressions of a condition
16363 
16364   @param thd             thread handle
16365   @param cond            the condition where equalities are to be propagated
16366   @param *new_equalities the multiple equalities to be propagated
16367   @param inherited        path to all inherited multiple equality items
16368   @param[out] is_simplifiable_cond   'cond' may be simplified after the
16369                                       propagation of the equalities
16370 
16371   @details
16372   The function recursively traverses the tree of the condition 'cond' and
16373   for each its AND sub-level of any depth the function merges the multiple
16374   equalities from the list 'new_equalities' into the multiple equalities
16375   attached to the AND item created for this sub-level.
16376   The function also [re]sets references to the equalities formed by the
16377   merges of multiple equalities in all field items occurred in 'cond'
16378   that are encountered in the equalities.
16379   If the result of any merge of multiple equalities is an impossible
16380   condition the function returns TRUE in the parameter is_simplifiable_cond.
16381 */
16382 
propagate_new_equalities(THD * thd,Item * cond,List<Item_equal> * new_equalities,COND_EQUAL * inherited,bool * is_simplifiable_cond)16383 void propagate_new_equalities(THD *thd, Item *cond,
16384                               List<Item_equal> *new_equalities,
16385                               COND_EQUAL *inherited,
16386                               bool *is_simplifiable_cond)
16387 {
16388   if (cond->type() == Item::COND_ITEM)
16389   {
16390     bool and_level= ((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC;
16391     if (and_level)
16392     {
16393       Item_cond_and *cond_and= (Item_cond_and *) cond;
16394       List<Item_equal> *cond_equalities= &cond_and->m_cond_equal.current_level;
16395       cond_and->m_cond_equal.upper_levels= inherited;
16396       if (!cond_equalities->is_empty() && cond_equalities != new_equalities)
16397       {
16398         Item_equal *equal_item;
16399         List_iterator<Item_equal> it(*new_equalities);
16400 	while ((equal_item= it++))
16401 	{
16402           equal_item->merge_into_list(thd, cond_equalities, true, true);
16403         }
16404         List_iterator<Item_equal> ei(*cond_equalities);
16405         while ((equal_item= ei++))
16406 	{
16407           if (equal_item->const_item() && !equal_item->val_int())
16408 	  {
16409             *is_simplifiable_cond= true;
16410             return;
16411           }
16412         }
16413       }
16414     }
16415 
16416     Item *item;
16417     List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
16418     while ((item= li++))
16419     {
16420       COND_EQUAL *new_inherited= and_level && item->type() == Item::COND_ITEM ?
16421                                    &((Item_cond_and *) cond)->m_cond_equal :
16422                                    inherited;
16423       propagate_new_equalities(thd, item, new_equalities, new_inherited,
16424                                is_simplifiable_cond);
16425     }
16426   }
16427   else if (cond->type() == Item::FUNC_ITEM &&
16428            ((Item_cond*) cond)->functype() == Item_func::MULT_EQUAL_FUNC)
16429   {
16430     Item_equal *equal_item;
16431     List_iterator<Item_equal> it(*new_equalities);
16432     Item_equal *equality= (Item_equal *) cond;
16433     equality->upper_levels= inherited;
16434     while ((equal_item= it++))
16435     {
16436       equality->merge_with_check(thd, equal_item, true);
16437     }
16438     if (equality->const_item() && !equality->val_int())
16439       *is_simplifiable_cond= true;
16440   }
16441   else
16442   {
16443     cond= cond->propagate_equal_fields(thd,
16444                                        Item::Context_boolean(), inherited);
16445     cond->update_used_tables();
16446   }
16447 }
16448 
16449 /*
16450   Check if cond_is_datetime_is_null() is true for the condition cond, or
16451   for any of its AND/OR-children
16452 */
cond_has_datetime_is_null(Item * cond)16453 bool cond_has_datetime_is_null(Item *cond)
16454 {
16455   if (cond_is_datetime_is_null(cond))
16456     return true;
16457 
16458   if (cond->type() == Item::COND_ITEM)
16459   {
16460     List<Item> *cond_arg_list= ((Item_cond*) cond)->argument_list();
16461     List_iterator<Item> li(*cond_arg_list);
16462     Item *item;
16463     while ((item= li++))
16464     {
16465       if (cond_has_datetime_is_null(item))
16466         return true;
16467     }
16468   }
16469   return false;
16470 }
16471 
16472 /*
16473   Check if passed condtition has for of
16474 
16475     not_null_date_col IS NULL
16476 
16477   where not_null_date_col has a datte or datetime type
16478 */
16479 
cond_is_datetime_is_null(Item * cond)16480 bool cond_is_datetime_is_null(Item *cond)
16481 {
16482   if (cond->type() == Item::FUNC_ITEM &&
16483       ((Item_func*) cond)->functype() == Item_func::ISNULL_FUNC)
16484   {
16485     return ((Item_func_isnull*) cond)->arg_is_datetime_notnull_field();
16486   }
16487   return false;
16488 }
16489 
16490 
16491 /**
16492   @brief
16493   Evaluate all constant boolean sub-expressions in a condition
16494 
16495   @param thd        thread handle
16496   @param cond       condition where where to evaluate constant sub-expressions
16497   @param[out] cond_value : the returned value of the condition
16498                            (TRUE/FALSE/UNKNOWN:
16499                            Item::COND_TRUE/Item::COND_FALSE/Item::COND_OK)
16500   @return
16501    the item that is the result of the substitution of all inexpensive constant
16502    boolean sub-expressions into cond, or,
16503    NULL if the condition is constant and is evaluated to FALSE.
16504 
16505   @details
16506   This function looks for all inexpensive constant boolean sub-expressions in
16507   the given condition 'cond' and substitutes them for their values.
16508   For example, the condition 2 > (5 + 1) or a < (10 / 2)
16509   will be transformed to the condition a < (10 / 2).
16510   Note that a constant sub-expression is evaluated only if it is constant and
16511   inexpensive. A sub-expression with an uncorrelated subquery may be evaluated
16512   only if the subquery is considered as inexpensive.
16513   The function does not evaluate a constant sub-expression if it is not on one
16514   of AND/OR levels of the condition 'cond'. For example, the subquery in the
16515   condition a > (select max(b) from t1 where b > 5) will never be evaluated
16516   by this function.
16517   If a constant boolean sub-expression is evaluated to TRUE then:
16518     - when the sub-expression is a conjunct of an AND formula it is simply
16519       removed from this formula
16520     - when the sub-expression is a disjunct of an OR formula the whole OR
16521       formula is converted to TRUE
16522   If a constant boolean sub-expression is evaluated to FALSE then:
16523     - when the sub-expression is a disjunct of an OR formula it is simply
16524       removed from this formula
16525     - when the sub-expression is a conjuct of an AND formula the whole AND
16526       formula is converted to FALSE
16527   When a disjunct/conjunct is removed from an OR/AND formula it might happen
16528   that there is only one conjunct/disjunct remaining. In this case this
16529   remaining disjunct/conjunct must be merged into underlying AND/OR formula,
16530   because AND/OR levels must alternate in the same way as they alternate
16531   after fix_fields() is called for the original condition.
16532   The specifics of merging a formula f into an AND formula A appears
16533   when A contains multiple equalities and f contains multiple equalities.
16534   In this case the multiple equalities from f and A have to be merged.
16535   After this the resulting multiple equalities have to be propagated into
16536   the all AND/OR levels of the formula A (see propagate_new_equalities()).
16537   The propagation of multiple equalities might result in forming multiple
16538   equalities that are always FALSE. This, in its turn, might trigger further
16539   simplification of the condition.
16540 
16541   @note
16542   EXAMPLE 1:
16543   SELECT * FROM t1 WHERE (b = 1 OR a = 1) AND (b = 5 AND a = 5 OR 1 != 1);
16544   First 1 != 1 will be removed from the second conjunct:
16545   => SELECT * FROM t1 WHERE (b = 1 OR a = 1) AND (b = 5 AND a = 5);
16546   Then (b = 5 AND a = 5) will be merged into the top level condition:
16547   => SELECT * FROM t1 WHERE (b = 1 OR a = 1) AND (b = 5) AND (a = 5);
16548   Then (b = 5), (a = 5)  will be propagated into the disjuncs of
16549   (b = 1 OR a = 1):
16550   => SELECT * FROM t1 WHERE ((b = 1) AND (b = 5) AND (a = 5) OR
16551                              (a = 1) AND (b = 5) AND (a = 5)) AND
16552                             (b = 5) AND (a = 5)
16553   => SELECT * FROM t1 WHERE ((FALSE AND (a = 5)) OR
16554                              (FALSE AND (b = 5))) AND
16555                              (b = 5) AND (a = 5)
16556   After this an additional call of remove_eq_conds() converts it
16557   to FALSE
16558 
16559   EXAMPLE 2:
16560   SELECT * FROM t1 WHERE (b = 1 OR a = 5) AND (b = 5 AND a = 5 OR 1 != 1);
16561   => SELECT * FROM t1 WHERE (b = 1 OR a = 5) AND (b = 5 AND a = 5);
16562   => SELECT * FROM t1 WHERE (b = 1 OR a = 5) AND (b = 5) AND (a = 5);
16563   => SELECT * FROM t1 WHERE ((b = 1) AND (b = 5) AND (a = 5) OR
16564                              (a = 5) AND (b = 5) AND (a = 5)) AND
16565                             (b = 5) AND (a = 5)
16566   => SELECT * FROM t1 WHERE ((FALSE AND (a = 5)) OR
16567                              ((b = 5) AND (a = 5))) AND
16568                              (b = 5) AND (a = 5)
16569   After this an additional call of  remove_eq_conds() converts it to
16570  =>  SELECT * FROM t1 WHERE (b = 5) AND (a = 5)
16571 */
16572 
16573 
16574 COND *
remove_eq_conds(THD * thd,Item::cond_result * cond_value,bool top_level_arg)16575 Item_cond::remove_eq_conds(THD *thd, Item::cond_result *cond_value,
16576                            bool top_level_arg)
16577 {
16578   bool and_level= functype() == Item_func::COND_AND_FUNC;
16579   List<Item> *cond_arg_list= argument_list();
16580 
16581   if (and_level)
16582   {
16583     /*
16584       Remove multiple equalities that became always true (e.g. after
16585       constant row substitution).
16586       They would be removed later in the function anyway, but the list of
16587       them cond_equal.current_level also  must be adjusted correspondingly.
16588       So it's easier  to do it at one pass through the list of the equalities.
16589     */
16590      List<Item_equal> *cond_equalities=
16591       &((Item_cond_and *) this)->m_cond_equal.current_level;
16592      cond_arg_list->disjoin((List<Item> *) cond_equalities);
16593      List_iterator<Item_equal> it(*cond_equalities);
16594      Item_equal *eq_item;
16595      while ((eq_item= it++))
16596      {
16597        if (eq_item->const_item() && eq_item->val_int())
16598          it.remove();
16599      }
16600      cond_arg_list->append((List<Item> *) cond_equalities);
16601   }
16602 
16603   List<Item_equal> new_equalities;
16604   List_iterator<Item> li(*cond_arg_list);
16605   bool should_fix_fields= 0;
16606   Item::cond_result tmp_cond_value;
16607   Item *item;
16608 
16609   /*
16610     If the list cond_arg_list became empty then it consisted only
16611     of always true multiple equalities.
16612   */
16613   *cond_value= cond_arg_list->elements ? Item::COND_UNDEF : Item::COND_TRUE;
16614 
16615   while ((item=li++))
16616   {
16617     Item *new_item= item->remove_eq_conds(thd, &tmp_cond_value, false);
16618     if (!new_item)
16619     {
16620       /* This can happen only when item is converted to TRUE or FALSE */
16621       li.remove();
16622     }
16623     else if (item != new_item)
16624     {
16625       /*
16626         This can happen when:
16627         - item was an OR formula converted to one disjunct
16628         - item was an AND formula converted to one conjunct
16629         In these cases the disjunct/conjunct must be merged into the
16630         argument list of cond.
16631       */
16632       if (new_item->type() == Item::COND_ITEM &&
16633           item->type() == Item::COND_ITEM)
16634       {
16635         DBUG_ASSERT(functype() == ((Item_cond *) new_item)->functype());
16636         List<Item> *new_item_arg_list=
16637           ((Item_cond *) new_item)->argument_list();
16638         if (and_level)
16639         {
16640           /*
16641             If new_item is an AND formula then multiple equalities
16642             of new_item_arg_list must merged into multiple equalities
16643             of cond_arg_list.
16644           */
16645           List<Item_equal> *new_item_equalities=
16646             &((Item_cond_and *) new_item)->m_cond_equal.current_level;
16647           if (!new_item_equalities->is_empty())
16648           {
16649             /*
16650               Cut the multiple equalities from the new_item_arg_list and
16651               append them on the list new_equalities. Later the equalities
16652               from this list will be merged into the multiple equalities
16653               of cond_arg_list all together.
16654             */
16655             new_item_arg_list->disjoin((List<Item> *) new_item_equalities);
16656             new_equalities.append(new_item_equalities);
16657           }
16658         }
16659         if (new_item_arg_list->is_empty())
16660           li.remove();
16661         else
16662         {
16663           uint cnt= new_item_arg_list->elements;
16664           li.replace(*new_item_arg_list);
16665           /* Make iterator li ignore new items */
16666           for (cnt--; cnt; cnt--)
16667             li++;
16668           should_fix_fields= 1;
16669         }
16670       }
16671       else if (and_level &&
16672                new_item->type() == Item::FUNC_ITEM &&
16673                ((Item_cond*) new_item)->functype() ==
16674                 Item_func::MULT_EQUAL_FUNC)
16675       {
16676         li.remove();
16677         new_equalities.push_back((Item_equal *) new_item, thd->mem_root);
16678       }
16679       else
16680       {
16681         if (new_item->type() == Item::COND_ITEM &&
16682             ((Item_cond*) new_item)->functype() ==  functype())
16683         {
16684           List<Item> *new_item_arg_list=
16685             ((Item_cond *) new_item)->argument_list();
16686           uint cnt= new_item_arg_list->elements;
16687           li.replace(*new_item_arg_list);
16688           /* Make iterator li ignore new items */
16689           for (cnt--; cnt; cnt--)
16690             li++;
16691         }
16692         else
16693           li.replace(new_item);
16694         should_fix_fields= 1;
16695       }
16696     }
16697     if (*cond_value == Item::COND_UNDEF)
16698       *cond_value= tmp_cond_value;
16699     switch (tmp_cond_value) {
16700     case Item::COND_OK:                        // Not TRUE or FALSE
16701       if (and_level || *cond_value == Item::COND_FALSE)
16702         *cond_value=tmp_cond_value;
16703       break;
16704     case Item::COND_FALSE:
16705       if (and_level)
16706       {
16707         *cond_value= tmp_cond_value;
16708         return (COND*) 0;                        // Always false
16709       }
16710       break;
16711     case Item::COND_TRUE:
16712       if (!and_level)
16713       {
16714         *cond_value= tmp_cond_value;
16715         return (COND*) 0;                        // Always true
16716       }
16717       break;
16718     case Item::COND_UNDEF:                        // Impossible
16719       break; /* purecov: deadcode */
16720     }
16721   }
16722   COND *cond= this;
16723   if (!new_equalities.is_empty())
16724   {
16725     DBUG_ASSERT(and_level);
16726     /*
16727       Merge multiple equalities that were cut from the results of
16728       simplification of OR formulas converted into AND formulas.
16729       These multiple equalities are to be merged into the
16730       multiple equalities of  cond_arg_list.
16731     */
16732     COND_EQUAL *cond_equal= &((Item_cond_and *) this)->m_cond_equal;
16733     List<Item_equal> *cond_equalities= &cond_equal->current_level;
16734     cond_arg_list->disjoin((List<Item> *) cond_equalities);
16735     Item_equal *equality;
16736     List_iterator_fast<Item_equal> it(new_equalities);
16737     while ((equality= it++))
16738     {
16739       equality->upper_levels= cond_equal->upper_levels;
16740       equality->merge_into_list(thd, cond_equalities, false, false);
16741       List_iterator_fast<Item_equal> ei(*cond_equalities);
16742       while ((equality= ei++))
16743       {
16744         if (equality->const_item() && !equality->val_int())
16745         {
16746           *cond_value= Item::COND_FALSE;
16747           return (COND*) 0;
16748         }
16749       }
16750     }
16751     cond_arg_list->append((List<Item> *) cond_equalities);
16752     /*
16753       Propagate the newly formed multiple equalities to
16754       the all AND/OR levels of cond
16755     */
16756     bool is_simplifiable_cond= false;
16757     propagate_new_equalities(thd, this, cond_equalities,
16758                              cond_equal->upper_levels,
16759                              &is_simplifiable_cond);
16760     /*
16761       If the above propagation of multiple equalities brings us
16762       to multiple equalities that are always FALSE then try to
16763       simplify the condition with remove_eq_cond() again.
16764     */
16765     if (is_simplifiable_cond)
16766     {
16767       if (!(cond= cond->remove_eq_conds(thd, cond_value, false)))
16768         return cond;
16769     }
16770     should_fix_fields= 1;
16771   }
16772   if (should_fix_fields)
16773     cond->update_used_tables();
16774 
16775   if (!((Item_cond*) cond)->argument_list()->elements ||
16776       *cond_value != Item::COND_OK)
16777     return (COND*) 0;
16778   if (((Item_cond*) cond)->argument_list()->elements == 1)
16779   {                                                // Remove list
16780     item= ((Item_cond*) cond)->argument_list()->head();
16781     ((Item_cond*) cond)->argument_list()->empty();
16782     return item;
16783   }
16784   *cond_value= Item::COND_OK;
16785   return cond;
16786 }
16787 
16788 
16789 COND *
remove_eq_conds(THD * thd,Item::cond_result * cond_value,bool top_level_arg)16790 Item::remove_eq_conds(THD *thd, Item::cond_result *cond_value, bool top_level_arg)
16791 {
16792   if (const_item() && !is_expensive())
16793   {
16794     *cond_value= eval_const_cond() ? Item::COND_TRUE : Item::COND_FALSE;
16795     return (COND*) 0;
16796   }
16797   *cond_value= Item::COND_OK;
16798   return this;                                        // Point at next and level
16799 }
16800 
16801 
16802 COND *
remove_eq_conds(THD * thd,Item::cond_result * cond_value,bool top_level_arg)16803 Item_bool_func2::remove_eq_conds(THD *thd, Item::cond_result *cond_value,
16804                                  bool top_level_arg)
16805 {
16806   if (const_item() && !is_expensive())
16807   {
16808     *cond_value= eval_const_cond() ? Item::COND_TRUE : Item::COND_FALSE;
16809     return (COND*) 0;
16810   }
16811   if ((*cond_value= eq_cmp_result()) != Item::COND_OK)
16812   {
16813     if (args[0]->eq(args[1], true))
16814     {
16815       if (!args[0]->maybe_null || functype() == Item_func::EQUAL_FUNC)
16816         return (COND*) 0;                       // Compare of identical items
16817     }
16818   }
16819   *cond_value= Item::COND_OK;
16820   return this;                                  // Point at next and level
16821 }
16822 
16823 
16824 /**
16825   Remove const and eq items. Return new item, or NULL if no condition
16826   cond_value is set to according:
16827   COND_OK    query is possible (field = constant)
16828   COND_TRUE  always true       ( 1 = 1 )
16829   COND_FALSE always false      ( 1 = 2 )
16830 
16831   SYNPOSIS
16832     remove_eq_conds()
16833     thd                         THD environment
16834     cond                        the condition to handle
16835     cond_value                  the resulting value of the condition
16836 
16837   NOTES
16838     calls the inner_remove_eq_conds to check all the tree reqursively
16839 
16840   RETURN
16841     *COND with the simplified condition
16842 */
16843 
16844 COND *
remove_eq_conds(THD * thd,Item::cond_result * cond_value,bool top_level_arg)16845 Item_func_isnull::remove_eq_conds(THD *thd, Item::cond_result *cond_value,
16846                                   bool top_level_arg)
16847 {
16848   Item *real_item= args[0]->real_item();
16849   if (real_item->type() == Item::FIELD_ITEM)
16850   {
16851     Field *field= ((Item_field*) real_item)->field;
16852 
16853     if (((field->type() == MYSQL_TYPE_DATE) ||
16854          (field->type() == MYSQL_TYPE_DATETIME)) &&
16855          (field->flags & NOT_NULL_FLAG))
16856     {
16857       /* fix to replace 'NULL' dates with '0' (shreeve@uci.edu) */
16858       /*
16859         See BUG#12594011
16860         Documentation says that
16861         SELECT datetime_notnull d FROM t1 WHERE d IS NULL
16862         shall return rows where d=='0000-00-00'
16863 
16864         Thus, for DATE and DATETIME columns defined as NOT NULL,
16865         "date_notnull IS NULL" has to be modified to
16866         "date_notnull IS NULL OR date_notnull == 0" (if outer join)
16867         "date_notnull == 0"                         (otherwise)
16868 
16869       */
16870 
16871       Item *item0= new(thd->mem_root) Item_int(thd, (longlong) 0, 1);
16872       Item *eq_cond= new(thd->mem_root) Item_func_eq(thd, args[0], item0);
16873       if (!eq_cond)
16874         return this;
16875 
16876       COND *cond= this;
16877       if (field->table->pos_in_table_list->is_inner_table_of_outer_join())
16878       {
16879         // outer join: transform "col IS NULL" to "col IS NULL or col=0"
16880         Item *or_cond= new(thd->mem_root) Item_cond_or(thd, eq_cond, this);
16881         if (!or_cond)
16882           return this;
16883         cond= or_cond;
16884       }
16885       else
16886       {
16887         // not outer join: transform "col IS NULL" to "col=0"
16888         cond= eq_cond;
16889       }
16890 
16891       cond->fix_fields(thd, &cond);
16892       /*
16893         Note: although args[0] is a field, cond can still be a constant
16894         (in case field is a part of a dependent subquery).
16895 
16896         Note: we call cond->Item::remove_eq_conds() non-virtually (statically)
16897         for performance purpose.
16898         A non-qualified call, i.e. just cond->remove_eq_conds(),
16899         would call Item_bool_func2::remove_eq_conds() instead, which would
16900         try to do some extra job to detect if args[0] and args[1] are
16901         equivalent items. We know they are not (we have field=0 here).
16902       */
16903       return cond->Item::remove_eq_conds(thd, cond_value, false);
16904     }
16905 
16906     /*
16907       Handles this special case for some ODBC applications:
16908       The are requesting the row that was just updated with a auto_increment
16909       value with this construct:
16910 
16911       SELECT * from table_name where auto_increment_column IS NULL
16912       This will be changed to:
16913       SELECT * from table_name where auto_increment_column = LAST_INSERT_ID
16914 
16915       Note, this substitution is done if the NULL test is the only condition!
16916       If the NULL test is a part of a more complex condition, it is not
16917       substituted and is treated normally:
16918         WHERE auto_increment IS NULL AND something_else
16919     */
16920 
16921     if (top_level_arg) // "auto_increment_column IS NULL" is the only condition
16922     {
16923       if (field->flags & AUTO_INCREMENT_FLAG && !field->table->maybe_null &&
16924           (thd->variables.option_bits & OPTION_AUTO_IS_NULL) &&
16925           (thd->first_successful_insert_id_in_prev_stmt > 0 &&
16926            thd->substitute_null_with_insert_id))
16927       {
16928   #ifdef HAVE_QUERY_CACHE
16929         query_cache_abort(thd, &thd->query_cache_tls);
16930   #endif
16931         COND *new_cond, *cond= this;
16932         /* If this fails, we will catch it later before executing query */
16933         if ((new_cond= new (thd->mem_root) Item_func_eq(thd, args[0],
16934                                         new (thd->mem_root) Item_int(thd, "last_insert_id()",
16935                                                      thd->read_first_successful_insert_id_in_prev_stmt(),
16936                                                      MY_INT64_NUM_DECIMAL_DIGITS))))
16937         {
16938           cond= new_cond;
16939           /*
16940             Item_func_eq can't be fixed after creation so we do not check
16941             cond->fixed, also it do not need tables so we use 0 as second
16942             argument.
16943           */
16944           cond->fix_fields(thd, &cond);
16945         }
16946         /*
16947           IS NULL should be mapped to LAST_INSERT_ID only for first row, so
16948           clear for next row
16949         */
16950         thd->substitute_null_with_insert_id= FALSE;
16951 
16952         *cond_value= Item::COND_OK;
16953         return cond;
16954       }
16955     }
16956   }
16957   return Item::remove_eq_conds(thd, cond_value, top_level_arg);
16958 }
16959 
16960 
16961 /**
16962   Check if equality can be used in removing components of GROUP BY/DISTINCT
16963 
16964   @param    l          the left comparison argument (a field if any)
16965   @param    r          the right comparison argument (a const of any)
16966 
16967   @details
16968   Checks if an equality predicate can be used to take away
16969   DISTINCT/GROUP BY because it is known to be true for exactly one
16970   distinct value (e.g. <expr> == <const>).
16971   Arguments must be compared in the native type of the left argument
16972   and (for strings) in the native collation of the left argument.
16973   Otherwise, for example,
16974   <string_field> = <int_const> may match more than 1 distinct value or
16975   the <string_field>.
16976 
16977   @note We don't need to aggregate l and r collations here, because r -
16978   the constant item - has already been converted to a proper collation
16979   for comparison. We only need to compare this collation with field's collation.
16980 
16981   @retval true    can be used
16982   @retval false   cannot be used
16983 */
16984 
16985 /*
16986   psergey-todo: this returns false for int_column='1234' (here '1234' is a
16987   constant. Need to discuss this with Bar).
16988 
16989   See also Field::test_if_equality_guaranees_uniqueness(const Item *item);
16990 */
16991 static bool
test_if_equality_guarantees_uniqueness(Item * l,Item * r)16992 test_if_equality_guarantees_uniqueness(Item *l, Item *r)
16993 {
16994   return (r->const_item() || !(r->used_tables() & ~OUTER_REF_TABLE_BIT)) &&
16995     item_cmp_type(l, r) == l->cmp_type() &&
16996     (l->cmp_type() != STRING_RESULT ||
16997      l->collation.collation == r->collation.collation);
16998 }
16999 
17000 
17001 /*
17002   Return TRUE if i1 and i2 (if any) are equal items,
17003   or if i1 is a wrapper item around the f2 field.
17004 */
17005 
equal(Item * i1,Item * i2,Field * f2)17006 static bool equal(Item *i1, Item *i2, Field *f2)
17007 {
17008   DBUG_ASSERT((i2 == NULL) ^ (f2 == NULL));
17009 
17010   if (i2 != NULL)
17011     return i1->eq(i2, 1);
17012   else if (i1->type() == Item::FIELD_ITEM)
17013     return f2->eq(((Item_field *) i1)->field);
17014   else
17015     return FALSE;
17016 }
17017 
17018 
17019 /**
17020   Test if a field or an item is equal to a constant value in WHERE
17021 
17022   @param        cond            WHERE clause expression
17023   @param        comp_item       Item to find in WHERE expression
17024                                 (if comp_field != NULL)
17025   @param        comp_field      Field to find in WHERE expression
17026                                 (if comp_item != NULL)
17027   @param[out]   const_item      intermediate arg, set to Item pointer to NULL
17028 
17029   @return TRUE if the field is a constant value in WHERE
17030 
17031   @note
17032     comp_item and comp_field parameters are mutually exclusive.
17033 */
17034 bool
const_expression_in_where(COND * cond,Item * comp_item,Field * comp_field,Item ** const_item)17035 const_expression_in_where(COND *cond, Item *comp_item, Field *comp_field,
17036                           Item **const_item)
17037 {
17038   DBUG_ASSERT((comp_item == NULL) ^ (comp_field == NULL));
17039 
17040   Item *intermediate= NULL;
17041   if (const_item == NULL)
17042     const_item= &intermediate;
17043 
17044   if (cond->type() == Item::COND_ITEM)
17045   {
17046     bool and_level= (((Item_cond*) cond)->functype()
17047 		     == Item_func::COND_AND_FUNC);
17048     List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
17049     Item *item;
17050     while ((item=li++))
17051     {
17052       bool res=const_expression_in_where(item, comp_item, comp_field,
17053                                          const_item);
17054       if (res)					// Is a const value
17055       {
17056 	if (and_level)
17057 	  return 1;
17058       }
17059       else if (!and_level)
17060 	return 0;
17061     }
17062     return and_level ? 0 : 1;
17063   }
17064   else if (cond->eq_cmp_result() != Item::COND_OK)
17065   {						// boolean compare function
17066     Item_func* func= (Item_func*) cond;
17067     if (func->functype() != Item_func::EQUAL_FUNC &&
17068 	func->functype() != Item_func::EQ_FUNC)
17069       return 0;
17070     Item *left_item=	((Item_func*) cond)->arguments()[0];
17071     Item *right_item= ((Item_func*) cond)->arguments()[1];
17072     if (equal(left_item, comp_item, comp_field))
17073     {
17074       if (test_if_equality_guarantees_uniqueness (left_item, right_item))
17075       {
17076 	if (*const_item)
17077 	  return right_item->eq(*const_item, 1);
17078 	*const_item=right_item;
17079 	return 1;
17080       }
17081     }
17082     else if (equal(right_item, comp_item, comp_field))
17083     {
17084       if (test_if_equality_guarantees_uniqueness (right_item, left_item))
17085       {
17086 	if (*const_item)
17087 	  return left_item->eq(*const_item, 1);
17088 	*const_item=left_item;
17089 	return 1;
17090       }
17091     }
17092   }
17093   return 0;
17094 }
17095 
17096 
17097 /****************************************************************************
17098   Create internal temporary table
17099 ****************************************************************************/
17100 
17101 /**
17102   Create field for temporary table from given field.
17103 
17104   @param thd	       Thread handler
17105   @param org_field    field from which new field will be created
17106   @param name         New field name
17107   @param table	       Temporary table
17108   @param item	       !=NULL if item->result_field should point to new field.
17109                       This is relevant for how fill_record() is going to work:
17110                       If item != NULL then fill_record() will update
17111                       the record in the original table.
17112                       If item == NULL then fill_record() will update
17113                       the temporary table
17114 
17115   @retval
17116     NULL		on error
17117   @retval
17118     new_created field
17119 */
17120 
create_tmp_field_from_field(THD * thd,Field * org_field,LEX_CSTRING * name,TABLE * table,Item_field * item)17121 Field *create_tmp_field_from_field(THD *thd, Field *org_field,
17122                                    LEX_CSTRING *name, TABLE *table,
17123                                    Item_field *item)
17124 {
17125   Field *new_field;
17126 
17127   new_field= org_field->make_new_field(thd->mem_root, table,
17128                                        table == org_field->table);
17129   if (new_field)
17130   {
17131     new_field->init(table);
17132     new_field->orig_table= org_field->orig_table;
17133     if (item)
17134       item->result_field= new_field;
17135     else
17136       new_field->field_name= *name;
17137     new_field->flags|= org_field->flags & NO_DEFAULT_VALUE_FLAG;
17138     if (org_field->maybe_null() || (item && item->maybe_null))
17139       new_field->flags&= ~NOT_NULL_FLAG;	// Because of outer join
17140     if (org_field->type() == MYSQL_TYPE_VAR_STRING ||
17141         org_field->type() == MYSQL_TYPE_VARCHAR)
17142       table->s->db_create_options|= HA_OPTION_PACK_RECORD;
17143     else if (org_field->type() == FIELD_TYPE_DOUBLE)
17144       ((Field_double *) new_field)->not_fixed= TRUE;
17145     new_field->vcol_info= 0;
17146     new_field->cond_selectivity= 1.0;
17147     new_field->next_equal_field= NULL;
17148     new_field->option_list= NULL;
17149     new_field->option_struct= NULL;
17150   }
17151   return new_field;
17152 }
17153 
17154 
create_tmp_field_int(TABLE * table,uint convert_int_length)17155 Field *Item::create_tmp_field_int(TABLE *table, uint convert_int_length)
17156 {
17157   const Type_handler *h= &type_handler_long;
17158   if (max_char_length() > convert_int_length)
17159     h= &type_handler_longlong;
17160   return h->make_and_init_table_field(&name, Record_addr(maybe_null),
17161                                       *this, table);
17162 }
17163 
17164 
create_tmp_field(bool group,TABLE * table)17165 Field *Item_sum::create_tmp_field(bool group, TABLE *table)
17166 {
17167   Field *UNINIT_VAR(new_field);
17168   MEM_ROOT *mem_root= table->in_use->mem_root;
17169 
17170   switch (cmp_type()) {
17171   case REAL_RESULT:
17172   {
17173     new_field= new (mem_root)
17174       Field_double(max_char_length(), maybe_null, &name, decimals, TRUE);
17175     break;
17176   }
17177   case INT_RESULT:
17178   case TIME_RESULT:
17179   case DECIMAL_RESULT:
17180   case STRING_RESULT:
17181     new_field= tmp_table_field_from_field_type(table);
17182     break;
17183   case ROW_RESULT:
17184     // This case should never be choosen
17185     DBUG_ASSERT(0);
17186     new_field= 0;
17187     break;
17188   }
17189   if (new_field)
17190     new_field->init(table);
17191   return new_field;
17192 }
17193 
17194 
create_tmp_field_from_item_finalize(THD * thd,Field * new_field,Item * item,Item *** copy_func,bool modify_item)17195 static void create_tmp_field_from_item_finalize(THD *thd,
17196                                                 Field *new_field,
17197                                                 Item *item,
17198                                                 Item ***copy_func,
17199                                                 bool modify_item)
17200 {
17201   if (copy_func &&
17202       (item->is_result_field() ||
17203        (item->real_item()->is_result_field())))
17204     *((*copy_func)++) = item;			// Save for copy_funcs
17205   if (modify_item)
17206     item->set_result_field(new_field);
17207   if (item->type() == Item::NULL_ITEM)
17208     new_field->is_created_from_null_item= TRUE;
17209 }
17210 
17211 
17212 /**
17213   Create field for temporary table using type of given item.
17214 
17215   @param thd                   Thread handler
17216   @param item                  Item to create a field for
17217   @param table                 Temporary table
17218   @param copy_func             If set and item is a function, store copy of
17219                                item in this array
17220   @param modify_item           1 if item->result_field should point to new
17221                                item. This is relevent for how fill_record()
17222                                is going to work:
17223                                If modify_item is 1 then fill_record() will
17224                                update the record in the original table.
17225                                If modify_item is 0 then fill_record() will
17226                                update the temporary table
17227 
17228   @retval
17229     0  on error
17230   @retval
17231     new_created field
17232 */
17233 
create_tmp_field_from_item(THD * thd,Item * item,TABLE * table,Item *** copy_func,bool modify_item)17234 static Field *create_tmp_field_from_item(THD *thd, Item *item, TABLE *table,
17235                                          Item ***copy_func, bool modify_item)
17236 {
17237   DBUG_ASSERT(thd == table->in_use);
17238   Field* new_field= item->create_tmp_field(false, table);
17239   if (new_field)
17240     create_tmp_field_from_item_finalize(thd, new_field, item,
17241                                         copy_func, modify_item);
17242   return new_field;
17243 }
17244 
17245 
17246 /**
17247   Create field for information schema table.
17248 
17249   @param thd		Thread handler
17250   @param table		Temporary table
17251   @param item		Item to create a field for
17252 
17253   @retval
17254     0			on error
17255   @retval
17256     new_created field
17257 */
17258 
create_field_for_schema(THD * thd,TABLE * table)17259 Field *Item::create_field_for_schema(THD *thd, TABLE *table)
17260 {
17261   if (field_type() == MYSQL_TYPE_VARCHAR)
17262   {
17263     Field *field;
17264     if (max_length > MAX_FIELD_VARCHARLENGTH)
17265       field= new Field_blob(max_length, maybe_null, &name,
17266                             collation.collation);
17267     else
17268       field= new Field_varstring(max_length, maybe_null, &name,
17269                                  table->s, collation.collation);
17270     if (field)
17271       field->init(table);
17272     return field;
17273   }
17274   return tmp_table_field_from_field_type(table);
17275 }
17276 
17277 
17278 /**
17279   Create field for temporary table.
17280 
17281   @param thd		Thread handler
17282   @param table		Temporary table
17283   @param item		Item to create a field for
17284   @param type		Type of item (normally item->type)
17285   @param copy_func	If set and item is a function, store copy of item
17286                        in this array
17287   @param from_field    if field will be created using other field as example,
17288                        pointer example field will be written here
17289   @param default_field	If field has a default value field, store it here
17290   @param group		1 if we are going to do a relative group by on result
17291   @param modify_item	1 if item->result_field should point to new item.
17292                        This is relevent for how fill_record() is going to
17293                        work:
17294                        If modify_item is 1 then fill_record() will update
17295                        the record in the original table.
17296                        If modify_item is 0 then fill_record() will update
17297                        the temporary table
17298   @param table_cant_handle_bit_fields
17299                        Set to 1 if the temporary table cannot handle bit
17300                        fields. Only set for heap tables when the bit field
17301                        is part of an index.
17302   @param make_copy_field
17303                        Set when using with rollup when we want to have
17304                        an exact copy of the field.
17305   @retval
17306     0			on error
17307   @retval
17308     new_created field
17309 */
17310 
create_tmp_field(THD * thd,TABLE * table,Item * item,Item::Type type,Item *** copy_func,Field ** from_field,Field ** default_field,bool group,bool modify_item,bool table_cant_handle_bit_fields,bool make_copy_field)17311 Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type,
17312                         Item ***copy_func, Field **from_field,
17313                         Field **default_field,
17314                         bool group, bool modify_item,
17315                         bool table_cant_handle_bit_fields,
17316                         bool make_copy_field)
17317 {
17318   Field *result;
17319   Item::Type orig_type= type;
17320   Item *orig_item= 0;
17321 
17322   DBUG_ASSERT(thd == table->in_use);
17323 
17324   if (type != Item::FIELD_ITEM &&
17325       item->real_item()->type() == Item::FIELD_ITEM)
17326   {
17327     orig_item= item;
17328     item= item->real_item();
17329     type= Item::FIELD_ITEM;
17330   }
17331 
17332   switch (type) {
17333   case Item::TYPE_HOLDER:
17334   case Item::SUM_FUNC_ITEM:
17335   {
17336     result= item->create_tmp_field(group, table);
17337     if (!result)
17338       my_error(ER_OUT_OF_RESOURCES, MYF(ME_FATALERROR));
17339     return result;
17340   }
17341   case Item::DEFAULT_VALUE_ITEM:
17342   {
17343     Field *field= ((Item_default_value*) item)->field;
17344     if (field->default_value && (field->flags & BLOB_FLAG))
17345     {
17346       /*
17347         We have to use a copy function when using a blob with default value
17348         as the we have to calcuate the default value before we can use it.
17349       */
17350       return create_tmp_field_from_item(thd, item, table,
17351                                         (make_copy_field ? 0 : copy_func),
17352                                         modify_item);
17353     }
17354   }
17355   /* Fall through */
17356   case Item::FIELD_ITEM:
17357   case Item::CONTEXTUALLY_TYPED_VALUE_ITEM:
17358   case Item::INSERT_VALUE_ITEM:
17359   case Item::TRIGGER_FIELD_ITEM:
17360   {
17361     Item_field *field= (Item_field*) item;
17362     bool orig_modify= modify_item;
17363     if (orig_type == Item::REF_ITEM)
17364       modify_item= 0;
17365     /*
17366       If item have to be able to store NULLs but underlaid field can't do it,
17367       create_tmp_field_from_field() can't be used for tmp field creation.
17368     */
17369     if (((field->maybe_null && field->in_rollup) ||
17370 	(thd->create_tmp_table_for_derived  &&    /* for mat. view/dt */
17371 	 orig_item && orig_item->maybe_null)) &&
17372         !field->field->maybe_null())
17373     {
17374       bool save_maybe_null= FALSE;
17375       /*
17376         The item the ref points to may have maybe_null flag set while
17377         the ref doesn't have it. This may happen for outer fields
17378         when the outer query decided at some point after name resolution phase
17379         that this field might be null. Take this into account here.
17380       */
17381       if (orig_item)
17382       {
17383         save_maybe_null= item->maybe_null;
17384         item->maybe_null= orig_item->maybe_null;
17385       }
17386       result= create_tmp_field_from_item(thd, item, table, NULL,
17387                                          modify_item);
17388       *from_field= field->field;
17389       if (result && modify_item)
17390         field->result_field= result;
17391       if (orig_item)
17392       {
17393         item->maybe_null= save_maybe_null;
17394         result->field_name= orig_item->name;
17395       }
17396     }
17397     else if (table_cant_handle_bit_fields && field->field->type() ==
17398              MYSQL_TYPE_BIT)
17399     {
17400       const Type_handler *handler= item->type_handler_long_or_longlong();
17401       *from_field= field->field;
17402       if ((result=
17403              handler->make_and_init_table_field(&item->name,
17404                                                 Record_addr(item->maybe_null),
17405                                                 *item, table)))
17406         create_tmp_field_from_item_finalize(thd, result, item,
17407                                             copy_func, modify_item);
17408       if (result && modify_item)
17409         field->result_field= result;
17410     }
17411     else
17412     {
17413       LEX_CSTRING *tmp= orig_item ? &orig_item->name : &item->name;
17414       result= create_tmp_field_from_field(thd, (*from_field= field->field),
17415                                           tmp, table,
17416                                           modify_item ? field :
17417                                           NULL);
17418     }
17419 
17420     if (orig_type == Item::REF_ITEM && orig_modify)
17421       ((Item_ref*)orig_item)->set_result_field(result);
17422     /*
17423       Fields that are used as arguments to the DEFAULT() function already have
17424       their data pointers set to the default value during name resolution. See
17425       Item_default_value::fix_fields.
17426     */
17427     if (orig_type != Item::DEFAULT_VALUE_ITEM && field->field->eq_def(result))
17428       *default_field= field->field;
17429     return result;
17430   }
17431   /* Fall through */
17432   case Item::FUNC_ITEM:
17433     if (((Item_func *) item)->functype() == Item_func::FUNC_SP)
17434     {
17435       Item_func_sp *item_func_sp= (Item_func_sp *) item;
17436       Field *sp_result_field= item_func_sp->get_sp_result_field();
17437 
17438       if (make_copy_field)
17439       {
17440         DBUG_ASSERT(item_func_sp->result_field);
17441         *from_field= item_func_sp->result_field;
17442       }
17443       else
17444       {
17445         *((*copy_func)++)= item;
17446       }
17447       Field *result_field=
17448         create_tmp_field_from_field(thd,
17449                                     sp_result_field,
17450                                     &item_func_sp->name,
17451                                     table,
17452                                     NULL);
17453 
17454       if (modify_item)
17455         item->set_result_field(result_field);
17456 
17457       return result_field;
17458     }
17459 
17460     /* Fall through */
17461   case Item::COND_ITEM:
17462   case Item::SUBSELECT_ITEM:
17463   case Item::REF_ITEM:
17464   case Item::EXPR_CACHE_ITEM:
17465     if (make_copy_field)
17466     {
17467       DBUG_ASSERT(((Item_result_field*)item)->result_field);
17468       *from_field= ((Item_result_field*)item)->result_field;
17469     }
17470     /* Fall through */
17471   case Item::FIELD_AVG_ITEM:
17472   case Item::FIELD_STD_ITEM:
17473   case Item::PROC_ITEM:
17474   case Item::INT_ITEM:
17475   case Item::REAL_ITEM:
17476   case Item::DECIMAL_ITEM:
17477   case Item::STRING_ITEM:
17478   case Item::DATE_ITEM:
17479   case Item::NULL_ITEM:
17480   case Item::VARBIN_ITEM:
17481   case Item::CACHE_ITEM:
17482   case Item::WINDOW_FUNC_ITEM: // psergey-winfunc:
17483   case Item::PARAM_ITEM:
17484     return create_tmp_field_from_item(thd, item, table,
17485                                       (make_copy_field ? 0 : copy_func),
17486                                        modify_item);
17487   default:					// Dosen't have to be stored
17488     return 0;
17489   }
17490 }
17491 
17492 /*
17493   Set up column usage bitmaps for a temporary table
17494 
17495   IMPLEMENTATION
17496     For temporary tables, we need one bitmap with all columns set and
17497     a tmp_set bitmap to be used by things like filesort.
17498 */
17499 
17500 void
setup_tmp_table_column_bitmaps(TABLE * table,uchar * bitmaps,uint field_count)17501 setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps, uint field_count)
17502 {
17503   uint bitmap_size= bitmap_buffer_size(field_count);
17504 
17505   DBUG_ASSERT(table->s->virtual_fields == 0 && table->def_vcol_set == 0);
17506 
17507   my_bitmap_init(&table->def_read_set, (my_bitmap_map*) bitmaps, field_count,
17508               FALSE);
17509   bitmaps+= bitmap_size;
17510   my_bitmap_init(&table->tmp_set,
17511                  (my_bitmap_map*) bitmaps, field_count, FALSE);
17512   bitmaps+= bitmap_size;
17513   my_bitmap_init(&table->eq_join_set,
17514                  (my_bitmap_map*) bitmaps, field_count, FALSE);
17515   bitmaps+= bitmap_size;
17516   my_bitmap_init(&table->cond_set,
17517                  (my_bitmap_map*) bitmaps, field_count, FALSE);
17518   bitmaps+= bitmap_size;
17519   my_bitmap_init(&table->has_value_set,
17520                  (my_bitmap_map*) bitmaps, field_count, FALSE);
17521   /* write_set and all_set are copies of read_set */
17522   table->def_write_set= table->def_read_set;
17523   table->s->all_set= table->def_read_set;
17524   bitmap_set_all(&table->s->all_set);
17525   table->default_column_bitmaps();
17526 }
17527 
17528 
17529 void
setup_tmp_table_column_bitmaps(TABLE * table,uchar * bitmaps)17530 setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps)
17531 {
17532   setup_tmp_table_column_bitmaps(table, bitmaps, table->s->fields);
17533 }
17534 
17535 
17536 /**
17537   Create a temp table according to a field list.
17538 
17539   Given field pointers are changed to point at tmp_table for
17540   send_result_set_metadata. The table object is self contained: it's
17541   allocated in its own memory root, as well as Field objects
17542   created for table columns.
17543   This function will replace Item_sum items in 'fields' list with
17544   corresponding Item_field items, pointing at the fields in the
17545   temporary table, unless this was prohibited by TRUE
17546   value of argument save_sum_fields. The Item_field objects
17547   are created in THD memory root.
17548 
17549   @param thd                  thread handle
17550   @param param                a description used as input to create the table
17551   @param fields               list of items that will be used to define
17552                               column types of the table (also see NOTES)
17553   @param group                Create an unique key over all group by fields.
17554                               This is used to retrive the row during
17555                               end_write_group() and update them.
17556   @param distinct             should table rows be distinct
17557   @param save_sum_fields      see NOTES
17558   @param select_options       Optiions for how the select is run.
17559                               See sql_priv.h for a list of options.
17560   @param rows_limit           Maximum number of rows to insert into the
17561                               temporary table
17562   @param table_alias          possible name of the temporary table that can
17563                               be used for name resolving; can be "".
17564   @param do_not_open          only create the TABLE object, do not
17565                               open the table in the engine
17566   @param keep_row_order       rows need to be read in the order they were
17567                               inserted, the engine should preserve this order
17568 */
17569 
17570 TABLE *
create_tmp_table(THD * thd,TMP_TABLE_PARAM * param,List<Item> & fields,ORDER * group,bool distinct,bool save_sum_fields,ulonglong select_options,ha_rows rows_limit,const LEX_CSTRING * table_alias,bool do_not_open,bool keep_row_order)17571 create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List<Item> &fields,
17572 		 ORDER *group, bool distinct, bool save_sum_fields,
17573 		 ulonglong select_options, ha_rows rows_limit,
17574                  const LEX_CSTRING *table_alias, bool do_not_open,
17575                  bool keep_row_order)
17576 {
17577   MEM_ROOT *mem_root_save, own_root;
17578   TABLE *table;
17579   TABLE_SHARE *share;
17580   uint	i,field_count,null_count,null_pack_length;
17581   uint  copy_func_count= param->func_count;
17582   uint  hidden_null_count, hidden_null_pack_length, hidden_field_count;
17583   uint  blob_count,group_null_items, string_count;
17584   uint  temp_pool_slot=MY_BIT_NONE;
17585   uint fieldnr= 0;
17586   ulong reclength, string_total_length;
17587   bool  using_unique_constraint= false;
17588   bool  use_packed_rows= false;
17589   bool  not_all_columns= !(select_options & TMP_TABLE_ALL_COLUMNS);
17590   bool  save_abort_on_warning;
17591   char  *tmpname,path[FN_REFLEN];
17592   uchar	*pos, *group_buff, *bitmaps;
17593   uchar *null_flags;
17594   Field **reg_field, **from_field, **default_field;
17595   uint *blob_field;
17596   Copy_field *copy=0;
17597   KEY *keyinfo;
17598   KEY_PART_INFO *key_part_info;
17599   Item **copy_func;
17600   TMP_ENGINE_COLUMNDEF *recinfo;
17601   /*
17602     total_uneven_bit_length is uneven bit length for visible fields
17603     hidden_uneven_bit_length is uneven bit length for hidden fields
17604   */
17605   uint total_uneven_bit_length= 0, hidden_uneven_bit_length= 0;
17606   bool force_copy_fields= param->force_copy_fields;
17607   /* Treat sum functions as normal ones when loose index scan is used. */
17608   save_sum_fields|= param->precomputed_group_by;
17609   DBUG_ENTER("create_tmp_table");
17610   DBUG_PRINT("enter",
17611              ("table_alias: '%s'  distinct: %d  save_sum_fields: %d  "
17612               "rows_limit: %lu  group: %d", table_alias->str,
17613               (int) distinct, (int) save_sum_fields,
17614               (ulong) rows_limit, MY_TEST(group)));
17615 
17616   if (use_temp_pool && !(test_flags & TEST_KEEP_TMP_TABLES))
17617     temp_pool_slot = bitmap_lock_set_next(&temp_pool);
17618 
17619   if (temp_pool_slot != MY_BIT_NONE) // we got a slot
17620     sprintf(path, "%s_%lx_%i", tmp_file_prefix,
17621             current_pid, temp_pool_slot);
17622   else
17623   {
17624     /* if we run out of slots or we are not using tempool */
17625     sprintf(path, "%s%lx_%lx_%x", tmp_file_prefix,current_pid,
17626             (ulong) thd->thread_id, thd->tmp_table++);
17627   }
17628 
17629   /*
17630     No need to change table name to lower case as we are only creating
17631     MyISAM, Aria or HEAP tables here
17632   */
17633   fn_format(path, path, mysql_tmpdir, "", MY_REPLACE_EXT|MY_UNPACK_FILENAME);
17634 
17635   if (group)
17636   {
17637     ORDER **prev= &group;
17638     if (!param->quick_group)
17639       group=0;					// Can't use group key
17640     else for (ORDER *tmp=group ; tmp ; tmp=tmp->next)
17641     {
17642       /* Exclude found constant from the list */
17643       if ((*tmp->item)->const_item())
17644       {
17645         *prev= tmp->next;
17646         param->group_parts--;
17647         continue;
17648       }
17649       else
17650         prev= &(tmp->next);
17651       /*
17652         marker == 4 means two things:
17653         - store NULLs in the key, and
17654         - convert BIT fields to 64-bit long, needed because MEMORY tables
17655           can't index BIT fields.
17656       */
17657       (*tmp->item)->marker=4;			// Store null in key
17658       if ((*tmp->item)->too_big_for_varchar())
17659 	using_unique_constraint= true;
17660     }
17661     if (param->group_length >= MAX_BLOB_WIDTH)
17662       using_unique_constraint= true;
17663     if (group)
17664       distinct=0;				// Can't use distinct
17665   }
17666 
17667   field_count=param->field_count+param->func_count+param->sum_func_count;
17668   hidden_field_count=param->hidden_field_count;
17669 
17670   /*
17671     When loose index scan is employed as access method, it already
17672     computes all groups and the result of all aggregate functions. We
17673     make space for the items of the aggregate function in the list of
17674     functions TMP_TABLE_PARAM::items_to_copy, so that the values of
17675     these items are stored in the temporary table.
17676   */
17677   if (param->precomputed_group_by)
17678     copy_func_count+= param->sum_func_count;
17679 
17680   init_sql_alloc(&own_root, "tmp_table", TABLE_ALLOC_BLOCK_SIZE, 0,
17681                  MYF(MY_THREAD_SPECIFIC));
17682 
17683   if (!multi_alloc_root(&own_root,
17684                         &table, sizeof(*table),
17685                         &share, sizeof(*share),
17686                         &reg_field, sizeof(Field*) * (field_count+1),
17687                         &default_field, sizeof(Field*) * (field_count),
17688                         &blob_field, sizeof(uint)*(field_count+1),
17689                         &from_field, sizeof(Field*)*field_count,
17690                         &copy_func, sizeof(*copy_func)*(copy_func_count+1),
17691                         &param->keyinfo, sizeof(*param->keyinfo),
17692                         &key_part_info,
17693                         sizeof(*key_part_info)*(param->group_parts+1),
17694                         &param->start_recinfo,
17695                         sizeof(*param->recinfo)*(field_count*2+4),
17696                         &tmpname, (uint) strlen(path)+1,
17697                         &group_buff, (group && ! using_unique_constraint ?
17698                                       param->group_length : 0),
17699                         &bitmaps, bitmap_buffer_size(field_count)*6,
17700                         NullS))
17701   {
17702     if (temp_pool_slot != MY_BIT_NONE)
17703       bitmap_lock_clear_bit(&temp_pool, temp_pool_slot);
17704     DBUG_RETURN(NULL);				/* purecov: inspected */
17705   }
17706   /* Copy_field belongs to TMP_TABLE_PARAM, allocate it in THD mem_root */
17707   if (!(param->copy_field= copy= new (thd->mem_root) Copy_field[field_count]))
17708   {
17709     if (temp_pool_slot != MY_BIT_NONE)
17710       bitmap_lock_clear_bit(&temp_pool, temp_pool_slot);
17711     free_root(&own_root, MYF(0));               /* purecov: inspected */
17712     DBUG_RETURN(NULL);				/* purecov: inspected */
17713   }
17714   param->items_to_copy= copy_func;
17715   strmov(tmpname, path);
17716   /* make table according to fields */
17717 
17718   bzero((char*) table,sizeof(*table));
17719   bzero((char*) reg_field,sizeof(Field*)*(field_count+1));
17720   bzero((char*) default_field, sizeof(Field*) * (field_count));
17721   bzero((char*) from_field,sizeof(Field*)*field_count);
17722 
17723   table->mem_root= own_root;
17724   mem_root_save= thd->mem_root;
17725   thd->mem_root= &table->mem_root;
17726 
17727   table->field=reg_field;
17728   table->alias.set(table_alias->str, table_alias->length, table_alias_charset);
17729 
17730   table->reginfo.lock_type=TL_WRITE;	/* Will be updated */
17731   table->map=1;
17732   table->temp_pool_slot = temp_pool_slot;
17733   table->copy_blobs= 1;
17734   table->in_use= thd;
17735   table->quick_keys.init();
17736   table->covering_keys.init();
17737   table->intersect_keys.init();
17738   table->keys_in_use_for_query.init();
17739   table->no_rows_with_nulls= param->force_not_null_cols;
17740 
17741   table->s= share;
17742   init_tmp_table_share(thd, share, "", 0, "(temporary)", tmpname);
17743   share->blob_field= blob_field;
17744   share->table_charset= param->table_charset;
17745   share->primary_key= MAX_KEY;               // Indicate no primary key
17746   share->keys_for_keyread.init();
17747   share->keys_in_use.init();
17748   if (param->schema_table)
17749     share->db= INFORMATION_SCHEMA_NAME;
17750 
17751   /* Calculate which type of fields we will store in the temporary table */
17752 
17753   reclength= string_total_length= 0;
17754   blob_count= string_count= null_count= hidden_null_count= group_null_items= 0;
17755   param->using_outer_summary_function= 0;
17756 
17757   List_iterator_fast<Item> li(fields);
17758   Item *item;
17759   Field **tmp_from_field=from_field;
17760   while ((item=li++))
17761   {
17762     Item::Type type= item->type();
17763     if (type == Item::COPY_STR_ITEM)
17764     {
17765       item= ((Item_copy *)item)->get_item();
17766       type= item->type();
17767     }
17768     if (not_all_columns)
17769     {
17770       if (item->with_sum_func && type != Item::SUM_FUNC_ITEM)
17771       {
17772         if (item->used_tables() & OUTER_REF_TABLE_BIT)
17773           item->update_used_tables();
17774         if ((item->real_type() == Item::SUBSELECT_ITEM) ||
17775             (item->used_tables() & ~OUTER_REF_TABLE_BIT))
17776         {
17777 	  /*
17778 	    Mark that the we have ignored an item that refers to a summary
17779 	    function. We need to know this if someone is going to use
17780 	    DISTINCT on the result.
17781 	  */
17782 	  param->using_outer_summary_function=1;
17783 	  continue;
17784         }
17785       }
17786       if (item->const_item() && (int) hidden_field_count <= 0)
17787         continue; // We don't have to store this
17788     }
17789     if (type == Item::SUM_FUNC_ITEM && !group && !save_sum_fields)
17790     {						/* Can't calc group yet */
17791       Item_sum *sum_item= (Item_sum *) item;
17792       sum_item->result_field=0;
17793       for (i=0 ; i < sum_item->get_arg_count() ; i++)
17794       {
17795 	Item *arg= sum_item->get_arg(i);
17796 	if (!arg->const_item())
17797 	{
17798           Item *tmp_item;
17799           Field *new_field=
17800             create_tmp_field(thd, table, arg, arg->type(), &copy_func,
17801                              tmp_from_field, &default_field[fieldnr],
17802                              group != 0,not_all_columns,
17803                              distinct, false);
17804 	  if (!new_field)
17805 	    goto err;					// Should be OOM
17806           DBUG_ASSERT(!new_field->field_name.str || strlen(new_field->field_name.str) == new_field->field_name.length);
17807 	  tmp_from_field++;
17808 	  reclength+=new_field->pack_length();
17809 	  if (new_field->flags & BLOB_FLAG)
17810 	  {
17811 	    *blob_field++= fieldnr;
17812 	    blob_count++;
17813 	  }
17814           if (new_field->type() == MYSQL_TYPE_BIT)
17815             total_uneven_bit_length+= new_field->field_length & 7;
17816 	  *(reg_field++)= new_field;
17817           if (new_field->real_type() == MYSQL_TYPE_STRING ||
17818               new_field->real_type() == MYSQL_TYPE_VARCHAR)
17819           {
17820             string_count++;
17821             string_total_length+= new_field->pack_length();
17822           }
17823           thd->mem_root= mem_root_save;
17824           if (!(tmp_item= new (thd->mem_root)
17825                 Item_temptable_field(thd, new_field)))
17826             goto err;
17827           arg= sum_item->set_arg(i, thd, tmp_item);
17828           thd->mem_root= &table->mem_root;
17829           if (param->force_not_null_cols)
17830 	  {
17831             new_field->flags|= NOT_NULL_FLAG;
17832             new_field->null_ptr= NULL;
17833           }
17834 	  if (!(new_field->flags & NOT_NULL_FLAG))
17835           {
17836 	    null_count++;
17837             /*
17838               new_field->maybe_null() is still false, it will be
17839               changed below. But we have to setup Item_field correctly
17840             */
17841             arg->maybe_null=1;
17842           }
17843           new_field->field_index= fieldnr++;
17844 	}
17845       }
17846     }
17847     else
17848     {
17849       /*
17850 	The last parameter to create_tmp_field() is a bit tricky:
17851 
17852 	We need to set it to 0 in union, to get fill_record() to modify the
17853 	temporary table.
17854 	We need to set it to 1 on multi-table-update and in select to
17855 	write rows to the temporary table.
17856 	We here distinguish between UNION and multi-table-updates by the fact
17857 	that in the later case group is set to the row pointer.
17858 
17859         The test for item->marker == 4 is ensure we don't create a group-by
17860         key over a bit field as heap tables can't handle that.
17861       */
17862       Field *new_field;
17863       if (param->schema_table)
17864       {
17865         if ((new_field= item->create_field_for_schema(thd, table)))
17866           new_field->flags|= NO_DEFAULT_VALUE_FLAG;
17867       }
17868       else
17869       {
17870         new_field=
17871         create_tmp_field(thd, table, item, type, &copy_func,
17872                          tmp_from_field, &default_field[fieldnr],
17873                          group != 0,
17874                          !force_copy_fields &&
17875                            (not_all_columns || group !=0),
17876                          /*
17877                            If item->marker == 4 then we force create_tmp_field
17878                            to create a 64-bit longs for BIT fields because HEAP
17879                            tables can't index BIT fields directly. We do the
17880                            same for distinct, as we want the distinct index
17881                            to be usable in this case too.
17882                          */
17883                          item->marker == 4  || param->bit_fields_as_long,
17884                          force_copy_fields);
17885       }
17886       if (unlikely(!new_field))
17887       {
17888 	if (unlikely(thd->is_fatal_error))
17889 	  goto err;				// Got OOM
17890 	continue;				// Some kind of const item
17891       }
17892       DBUG_ASSERT(!new_field->field_name.str || strlen(new_field->field_name.str) == new_field->field_name.length);
17893       if (type == Item::SUM_FUNC_ITEM)
17894       {
17895         Item_sum *agg_item= (Item_sum *) item;
17896         /*
17897           Update the result field only if it has never been set, or if the
17898           created temporary table is not to be used for subquery
17899           materialization.
17900 
17901           The reason is that for subqueries that require
17902           materialization as part of their plan, we create the
17903           'external' temporary table needed for IN execution, after
17904           the 'internal' temporary table needed for grouping.  Since
17905           both the external and the internal temporary tables are
17906           created for the same list of SELECT fields of the subquery,
17907           setting 'result_field' for each invocation of
17908           create_tmp_table overrides the previous value of
17909           'result_field'.
17910 
17911           The condition below prevents the creation of the external
17912           temp table to override the 'result_field' that was set for
17913           the internal temp table.
17914         */
17915         if (!agg_item->result_field || !param->materialized_subquery)
17916           agg_item->result_field= new_field;
17917       }
17918       tmp_from_field++;
17919       if (param->force_not_null_cols)
17920       {
17921         new_field->flags|= NOT_NULL_FLAG;
17922         new_field->null_ptr= NULL;
17923       }
17924       reclength+=new_field->pack_length();
17925       if (!(new_field->flags & NOT_NULL_FLAG))
17926 	null_count++;
17927       if (new_field->type() == MYSQL_TYPE_BIT)
17928         total_uneven_bit_length+= new_field->field_length & 7;
17929       if (new_field->flags & BLOB_FLAG)
17930       {
17931         *blob_field++= fieldnr;
17932 	blob_count++;
17933       }
17934 
17935       if (new_field->real_type() == MYSQL_TYPE_STRING ||
17936           new_field->real_type() == MYSQL_TYPE_VARCHAR)
17937       {
17938         string_count++;
17939         string_total_length+= new_field->pack_length();
17940       }
17941 
17942       if (item->marker == 4 && item->maybe_null)
17943       {
17944 	group_null_items++;
17945 	new_field->flags|= GROUP_FLAG;
17946       }
17947       new_field->field_index= fieldnr++;
17948       *(reg_field++)= new_field;
17949     }
17950     if (!--hidden_field_count)
17951     {
17952       /*
17953         This was the last hidden field; Remember how many hidden fields could
17954         have null
17955       */
17956       hidden_null_count=null_count;
17957       /*
17958 	We need to update hidden_field_count as we may have stored group
17959 	functions with constant arguments
17960       */
17961       param->hidden_field_count= fieldnr;
17962       null_count= 0;
17963       /*
17964         On last hidden field we store uneven bit length in
17965         hidden_uneven_bit_length and proceed calculation of
17966         uneven bits for visible fields into
17967         total_uneven_bit_length variable.
17968       */
17969       hidden_uneven_bit_length= total_uneven_bit_length;
17970       total_uneven_bit_length= 0;
17971     }
17972   }
17973   DBUG_ASSERT(fieldnr == (uint) (reg_field - table->field));
17974   DBUG_ASSERT(field_count >= (uint) (reg_field - table->field));
17975   field_count= fieldnr;
17976   *reg_field= 0;
17977   *blob_field= 0;				// End marker
17978   share->fields= field_count;
17979   share->column_bitmap_size= bitmap_buffer_size(share->fields);
17980 
17981   /* If result table is small; use a heap */
17982   /* future: storage engine selection can be made dynamic? */
17983   if (blob_count || using_unique_constraint
17984       || (thd->variables.big_tables && !(select_options & SELECT_SMALL_RESULT))
17985       || (select_options & TMP_TABLE_FORCE_MYISAM)
17986       || thd->variables.tmp_memory_table_size == 0)
17987   {
17988     share->db_plugin= ha_lock_engine(0, TMP_ENGINE_HTON);
17989     table->file= get_new_handler(share, &table->mem_root,
17990                                  share->db_type());
17991     if (group &&
17992 	(param->group_parts > table->file->max_key_parts() ||
17993 	 param->group_length > table->file->max_key_length()))
17994       using_unique_constraint= true;
17995   }
17996   else
17997   {
17998     share->db_plugin= ha_lock_engine(0, heap_hton);
17999     table->file= get_new_handler(share, &table->mem_root,
18000                                  share->db_type());
18001   }
18002   if (!table->file)
18003     goto err;
18004 
18005   if (table->file->set_ha_share_ref(&share->ha_share))
18006   {
18007     delete table->file;
18008     goto err;
18009   }
18010 
18011   if (!using_unique_constraint)
18012     reclength+= group_null_items;	// null flag is stored separately
18013 
18014   share->blob_fields= blob_count;
18015   if (blob_count == 0)
18016   {
18017     /* We need to ensure that first byte is not 0 for the delete link */
18018     if (param->hidden_field_count)
18019       hidden_null_count++;
18020     else
18021       null_count++;
18022   }
18023   hidden_null_pack_length= (hidden_null_count + 7 +
18024                             hidden_uneven_bit_length) / 8;
18025   null_pack_length= (hidden_null_pack_length +
18026                      (null_count + total_uneven_bit_length + 7) / 8);
18027   reclength+=null_pack_length;
18028   if (!reclength)
18029     reclength=1;				// Dummy select
18030   /* Use packed rows if there is blobs or a lot of space to gain */
18031   if (blob_count ||
18032       (string_total_length >= STRING_TOTAL_LENGTH_TO_PACK_ROWS &&
18033        (reclength / string_total_length <= RATIO_TO_PACK_ROWS ||
18034         string_total_length / string_count >= AVG_STRING_LENGTH_TO_PACK_ROWS)))
18035     use_packed_rows= 1;
18036 
18037   share->reclength= reclength;
18038   {
18039     uint alloc_length=ALIGN_SIZE(reclength+MI_UNIQUE_HASH_LENGTH+1);
18040     share->rec_buff_length= alloc_length;
18041     if (!(table->record[0]= (uchar*)
18042                             alloc_root(&table->mem_root, alloc_length*3)))
18043       goto err;
18044     table->record[1]= table->record[0]+alloc_length;
18045     share->default_values= table->record[1]+alloc_length;
18046   }
18047   copy_func[0]=0;				// End marker
18048   param->func_count= (uint)(copy_func - param->items_to_copy);
18049 
18050   setup_tmp_table_column_bitmaps(table, bitmaps);
18051 
18052   recinfo=param->start_recinfo;
18053   null_flags=(uchar*) table->record[0];
18054   pos=table->record[0]+ null_pack_length;
18055   if (null_pack_length)
18056   {
18057     bzero((uchar*) recinfo,sizeof(*recinfo));
18058     recinfo->type=FIELD_NORMAL;
18059     recinfo->length=null_pack_length;
18060     recinfo++;
18061     bfill(null_flags,null_pack_length,255);	// Set null fields
18062 
18063     table->null_flags= (uchar*) table->record[0];
18064     share->null_fields= null_count+ hidden_null_count;
18065     share->null_bytes= share->null_bytes_for_compare= null_pack_length;
18066   }
18067   null_count= (blob_count == 0) ? 1 : 0;
18068   hidden_field_count=param->hidden_field_count;
18069 
18070   /* Protect against warnings in field_conv() in the next loop*/
18071   save_abort_on_warning= thd->abort_on_warning;
18072   thd->abort_on_warning= 0;
18073 
18074   for (i=0,reg_field=table->field; i < field_count; i++,reg_field++,recinfo++)
18075   {
18076     Field *field= *reg_field;
18077     uint length;
18078     bzero((uchar*) recinfo,sizeof(*recinfo));
18079 
18080     if (!(field->flags & NOT_NULL_FLAG))
18081     {
18082       recinfo->null_bit= (uint8)1 << (null_count & 7);
18083       recinfo->null_pos= null_count/8;
18084       field->move_field(pos,null_flags+null_count/8,
18085 			(uint8)1 << (null_count & 7));
18086       null_count++;
18087     }
18088     else
18089       field->move_field(pos,(uchar*) 0,0);
18090     if (field->type() == MYSQL_TYPE_BIT)
18091     {
18092       /* We have to reserve place for extra bits among null bits */
18093       ((Field_bit*) field)->set_bit_ptr(null_flags + null_count / 8,
18094                                         null_count & 7);
18095       null_count+= (field->field_length & 7);
18096     }
18097     field->reset();
18098 
18099     /*
18100       Test if there is a default field value. The test for ->ptr is to skip
18101       'offset' fields generated by initialize_tables
18102     */
18103     if (default_field[i] && default_field[i]->ptr)
18104     {
18105       /*
18106          default_field[i] is set only in the cases  when 'field' can
18107          inherit the default value that is defined for the field referred
18108          by the Item_field object from which 'field' has been created.
18109       */
18110       Field *orig_field= default_field[i];
18111       /* Get the value from default_values */
18112       if (orig_field->is_null_in_record(orig_field->table->s->default_values))
18113         field->set_null();
18114       else
18115       {
18116         /*
18117           Copy default value. We have to use field_conv() for copy, instead of
18118           memcpy(), because bit_fields may be stored differently
18119         */
18120         my_ptrdiff_t ptr_diff= (orig_field->table->s->default_values -
18121                                 orig_field->table->record[0]);
18122         field->set_notnull();
18123         orig_field->move_field_offset(ptr_diff);
18124         field_conv(field, orig_field);
18125         orig_field->move_field_offset(-ptr_diff);
18126       }
18127     }
18128 
18129     if (from_field[i])
18130     {						/* Not a table Item */
18131       copy->set(field,from_field[i],save_sum_fields);
18132       copy++;
18133     }
18134     length=field->pack_length_in_rec();
18135     pos+= length;
18136 
18137     /* Make entry for create table */
18138     recinfo->length=length;
18139     if (field->flags & BLOB_FLAG)
18140       recinfo->type= FIELD_BLOB;
18141     else if (use_packed_rows &&
18142              field->real_type() == MYSQL_TYPE_STRING &&
18143 	     length >= MIN_STRING_LENGTH_TO_PACK_ROWS)
18144       recinfo->type= FIELD_SKIP_ENDSPACE;
18145     else if (field->real_type() == MYSQL_TYPE_VARCHAR)
18146       recinfo->type= FIELD_VARCHAR;
18147     else
18148       recinfo->type= FIELD_NORMAL;
18149 
18150     if (!--hidden_field_count)
18151       null_count=(null_count+7) & ~7;		// move to next byte
18152 
18153     // fix table name in field entry
18154     field->set_table_name(&table->alias);
18155   }
18156   /* Handle group_null_items */
18157   bzero(pos, table->s->reclength - (pos - table->record[0]));
18158   MEM_CHECK_DEFINED(table->record[0], table->s->reclength);
18159 
18160   thd->abort_on_warning= save_abort_on_warning;
18161   param->copy_field_end=copy;
18162   param->recinfo= recinfo;              	// Pointer to after last field
18163   store_record(table,s->default_values);        // Make empty default record
18164 
18165   if (thd->variables.tmp_memory_table_size == ~ (ulonglong) 0)	// No limit
18166     share->max_rows= ~(ha_rows) 0;
18167   else
18168     share->max_rows= (ha_rows) (((share->db_type() == heap_hton) ?
18169                                  MY_MIN(thd->variables.tmp_memory_table_size,
18170                                      thd->variables.max_heap_table_size) :
18171                                  thd->variables.tmp_memory_table_size) /
18172                                 share->reclength);
18173   set_if_bigger(share->max_rows,1);		// For dummy start options
18174   /*
18175     Push the LIMIT clause to the temporary table creation, so that we
18176     materialize only up to 'rows_limit' records instead of all result records.
18177   */
18178   set_if_smaller(share->max_rows, rows_limit);
18179   param->end_write_records= rows_limit;
18180 
18181   keyinfo= param->keyinfo;
18182 
18183   if (group)
18184   {
18185     DBUG_PRINT("info",("Creating group key in temporary table"));
18186     table->group=group;				/* Table is grouped by key */
18187     param->group_buff=group_buff;
18188     share->keys=1;
18189     share->uniques= MY_TEST(using_unique_constraint);
18190     table->key_info= table->s->key_info= keyinfo;
18191     table->keys_in_use_for_query.set_bit(0);
18192     share->keys_in_use.set_bit(0);
18193     keyinfo->key_part=key_part_info;
18194     keyinfo->flags=HA_NOSAME | HA_BINARY_PACK_KEY | HA_PACK_KEY;
18195     keyinfo->ext_key_flags= keyinfo->flags;
18196     keyinfo->usable_key_parts=keyinfo->user_defined_key_parts= param->group_parts;
18197     keyinfo->ext_key_parts= keyinfo->user_defined_key_parts;
18198     keyinfo->key_length=0;
18199     keyinfo->rec_per_key=NULL;
18200     keyinfo->read_stats= NULL;
18201     keyinfo->collected_stats= NULL;
18202     keyinfo->algorithm= HA_KEY_ALG_UNDEF;
18203     keyinfo->is_statistics_from_stat_tables= FALSE;
18204     keyinfo->name= group_key;
18205     ORDER *cur_group= group;
18206     for (; cur_group ; cur_group= cur_group->next, key_part_info++)
18207     {
18208       Field *field=(*cur_group->item)->get_tmp_table_field();
18209       DBUG_ASSERT(field->table == table);
18210       bool maybe_null=(*cur_group->item)->maybe_null;
18211       key_part_info->null_bit=0;
18212       key_part_info->field=  field;
18213       key_part_info->fieldnr= field->field_index + 1;
18214       if (cur_group == group)
18215         field->key_start.set_bit(0);
18216       key_part_info->offset= field->offset(table->record[0]);
18217       key_part_info->length= (uint16) field->key_length();
18218       key_part_info->type=   (uint8) field->key_type();
18219       key_part_info->key_type =
18220 	((ha_base_keytype) key_part_info->type == HA_KEYTYPE_TEXT ||
18221 	 (ha_base_keytype) key_part_info->type == HA_KEYTYPE_VARTEXT1 ||
18222 	 (ha_base_keytype) key_part_info->type == HA_KEYTYPE_VARTEXT2) ?
18223 	0 : FIELDFLAG_BINARY;
18224       key_part_info->key_part_flag= 0;
18225       if (!using_unique_constraint)
18226       {
18227 	cur_group->buff=(char*) group_buff;
18228 
18229         if (maybe_null && !field->null_bit)
18230         {
18231           /*
18232             This can only happen in the unusual case where an outer join
18233             table was found to be not-nullable by the optimizer and we
18234             the item can't really be null.
18235             We solve this by marking the item as !maybe_null to ensure
18236             that the key,field and item definition match.
18237           */
18238           (*cur_group->item)->maybe_null= maybe_null= 0;
18239         }
18240 
18241 	if (!(cur_group->field= field->new_key_field(thd->mem_root,table,
18242                                                      group_buff +
18243                                                      MY_TEST(maybe_null),
18244                                                      key_part_info->length,
18245                                                      field->null_ptr,
18246                                                      field->null_bit)))
18247 	  goto err; /* purecov: inspected */
18248 
18249 	if (maybe_null)
18250 	{
18251 	  /*
18252 	    To be able to group on NULL, we reserved place in group_buff
18253 	    for the NULL flag just before the column. (see above).
18254 	    The field data is after this flag.
18255 	    The NULL flag is updated in 'end_update()' and 'end_write()'
18256 	  */
18257 	  keyinfo->flags|= HA_NULL_ARE_EQUAL;	// def. that NULL == NULL
18258 	  key_part_info->null_bit=field->null_bit;
18259 	  key_part_info->null_offset= (uint) (field->null_ptr -
18260 					      (uchar*) table->record[0]);
18261           cur_group->buff++;                        // Pointer to field data
18262 	  group_buff++;                         // Skipp null flag
18263 	}
18264 	group_buff+= cur_group->field->pack_length();
18265       }
18266       keyinfo->key_length+=  key_part_info->length;
18267     }
18268     /*
18269       Ensure we didn't overrun the group buffer. The < is only true when
18270       some maybe_null fields was changed to be not null fields.
18271     */
18272     DBUG_ASSERT(using_unique_constraint ||
18273                 group_buff <= param->group_buff + param->group_length);
18274   }
18275 
18276   if (distinct && field_count != param->hidden_field_count)
18277   {
18278     /*
18279       Create an unique key or an unique constraint over all columns
18280       that should be in the result.  In the temporary table, there are
18281       'param->hidden_field_count' extra columns, whose null bits are stored
18282       in the first 'hidden_null_pack_length' bytes of the row.
18283     */
18284     DBUG_PRINT("info",("hidden_field_count: %d", param->hidden_field_count));
18285 
18286     if (blob_count)
18287     {
18288       /*
18289         Special mode for index creation in MyISAM used to support unique
18290         indexes on blobs with arbitrary length. Such indexes cannot be
18291         used for lookups.
18292       */
18293       share->uniques= 1;
18294     }
18295     null_pack_length-=hidden_null_pack_length;
18296     keyinfo->user_defined_key_parts=
18297       ((field_count-param->hidden_field_count)+
18298        (share->uniques ? MY_TEST(null_pack_length) : 0));
18299     keyinfo->ext_key_parts= keyinfo->user_defined_key_parts;
18300     keyinfo->usable_key_parts= keyinfo->user_defined_key_parts;
18301     table->distinct= 1;
18302     share->keys= 1;
18303     if (!(key_part_info= (KEY_PART_INFO*)
18304           alloc_root(&table->mem_root,
18305                      keyinfo->user_defined_key_parts * sizeof(KEY_PART_INFO))))
18306       goto err;
18307     bzero((void*) key_part_info, keyinfo->user_defined_key_parts * sizeof(KEY_PART_INFO));
18308     table->keys_in_use_for_query.set_bit(0);
18309     share->keys_in_use.set_bit(0);
18310     table->key_info= table->s->key_info= keyinfo;
18311     keyinfo->key_part=key_part_info;
18312     keyinfo->flags=HA_NOSAME | HA_NULL_ARE_EQUAL | HA_BINARY_PACK_KEY | HA_PACK_KEY;
18313     keyinfo->ext_key_flags= keyinfo->flags;
18314     keyinfo->key_length= 0;  // Will compute the sum of the parts below.
18315     keyinfo->name= distinct_key;
18316     keyinfo->algorithm= HA_KEY_ALG_UNDEF;
18317     keyinfo->is_statistics_from_stat_tables= FALSE;
18318     keyinfo->read_stats= NULL;
18319     keyinfo->collected_stats= NULL;
18320 
18321     /*
18322       Needed by non-merged semi-joins: SJ-Materialized table must have a valid
18323       rec_per_key array, because it participates in join optimization. Since
18324       the table has no data, the only statistics we can provide is "unknown",
18325       i.e. zero values.
18326 
18327       (For table record count, we calculate and set JOIN_TAB::found_records,
18328        see get_delayed_table_estimates()).
18329     */
18330     size_t rpk_size= keyinfo->user_defined_key_parts * sizeof(keyinfo->rec_per_key[0]);
18331     if (!(keyinfo->rec_per_key= (ulong*) alloc_root(&table->mem_root,
18332                                                     rpk_size)))
18333       goto err;
18334     bzero(keyinfo->rec_per_key, rpk_size);
18335 
18336     /*
18337       Create an extra field to hold NULL bits so that unique indexes on
18338       blobs can distinguish NULL from 0. This extra field is not needed
18339       when we do not use UNIQUE indexes for blobs.
18340     */
18341     if (null_pack_length && share->uniques)
18342     {
18343       key_part_info->null_bit=0;
18344       key_part_info->offset=hidden_null_pack_length;
18345       key_part_info->length=null_pack_length;
18346       key_part_info->field= new Field_string(table->record[0],
18347                                              (uint32) key_part_info->length,
18348                                              (uchar*) 0,
18349                                              (uint) 0,
18350                                              Field::NONE,
18351                                              &null_clex_str, &my_charset_bin);
18352       if (!key_part_info->field)
18353         goto err;
18354       key_part_info->field->init(table);
18355       key_part_info->key_type=FIELDFLAG_BINARY;
18356       key_part_info->type=    HA_KEYTYPE_BINARY;
18357       key_part_info->fieldnr= key_part_info->field->field_index + 1;
18358       key_part_info++;
18359     }
18360     /* Create a distinct key over the columns we are going to return */
18361     for (i=param->hidden_field_count, reg_field=table->field + i ;
18362 	 i < field_count;
18363 	 i++, reg_field++, key_part_info++)
18364     {
18365       key_part_info->field=    *reg_field;
18366       (*reg_field)->flags |= PART_KEY_FLAG;
18367       if (key_part_info == keyinfo->key_part)
18368         (*reg_field)->key_start.set_bit(0);
18369       key_part_info->null_bit= (*reg_field)->null_bit;
18370       key_part_info->null_offset= (uint) ((*reg_field)->null_ptr -
18371                                           (uchar*) table->record[0]);
18372 
18373       key_part_info->offset=   (*reg_field)->offset(table->record[0]);
18374       key_part_info->length=   (uint16) (*reg_field)->pack_length();
18375       key_part_info->fieldnr= (*reg_field)->field_index + 1;
18376       /* TODO:
18377         The below method of computing the key format length of the
18378         key part is a copy/paste from opt_range.cc, and table.cc.
18379         This should be factored out, e.g. as a method of Field.
18380         In addition it is not clear if any of the Field::*_length
18381         methods is supposed to compute the same length. If so, it
18382         might be reused.
18383       */
18384       key_part_info->store_length= key_part_info->length;
18385 
18386       if ((*reg_field)->real_maybe_null())
18387       {
18388         key_part_info->store_length+= HA_KEY_NULL_LENGTH;
18389         key_part_info->key_part_flag |= HA_NULL_PART;
18390       }
18391       if ((*reg_field)->type() == MYSQL_TYPE_BLOB ||
18392           (*reg_field)->real_type() == MYSQL_TYPE_VARCHAR ||
18393           (*reg_field)->type() == MYSQL_TYPE_GEOMETRY)
18394       {
18395         if ((*reg_field)->type() == MYSQL_TYPE_BLOB ||
18396             (*reg_field)->type() == MYSQL_TYPE_GEOMETRY)
18397           key_part_info->key_part_flag|= HA_BLOB_PART;
18398         else
18399           key_part_info->key_part_flag|= HA_VAR_LENGTH_PART;
18400 
18401         key_part_info->store_length+=HA_KEY_BLOB_LENGTH;
18402       }
18403 
18404       keyinfo->key_length+= key_part_info->store_length;
18405 
18406       key_part_info->type=     (uint8) (*reg_field)->key_type();
18407       key_part_info->key_type =
18408 	((ha_base_keytype) key_part_info->type == HA_KEYTYPE_TEXT ||
18409 	 (ha_base_keytype) key_part_info->type == HA_KEYTYPE_VARTEXT1 ||
18410 	 (ha_base_keytype) key_part_info->type == HA_KEYTYPE_VARTEXT2) ?
18411 	0 : FIELDFLAG_BINARY;
18412     }
18413   }
18414 
18415   if (unlikely(thd->is_fatal_error))             // If end of memory
18416     goto err;					 /* purecov: inspected */
18417   share->db_record_offset= 1;
18418   table->used_for_duplicate_elimination= (param->sum_func_count == 0 &&
18419                                           (table->group || table->distinct));
18420   table->keep_row_order= keep_row_order;
18421 
18422   if (!do_not_open)
18423   {
18424     if (instantiate_tmp_table(table, param->keyinfo, param->start_recinfo,
18425                               &param->recinfo, select_options))
18426       goto err;
18427   }
18428 
18429   /* record[0] and share->default_values should now have been set up */
18430   MEM_CHECK_DEFINED(table->record[0], table->s->reclength);
18431   MEM_CHECK_DEFINED(share->default_values, table->s->reclength);
18432 
18433   empty_record(table);
18434   table->status= STATUS_NO_RECORD;
18435   thd->mem_root= mem_root_save;
18436 
18437   DBUG_RETURN(table);
18438 
18439 err:
18440   thd->mem_root= mem_root_save;
18441   free_tmp_table(thd,table);                    /* purecov: inspected */
18442   if (temp_pool_slot != MY_BIT_NONE)
18443     bitmap_lock_clear_bit(&temp_pool, temp_pool_slot);
18444   DBUG_RETURN(NULL);				/* purecov: inspected */
18445 }
18446 
18447 
18448 /****************************************************************************/
18449 
operator new(size_t size,THD * thd)18450 void *Virtual_tmp_table::operator new(size_t size, THD *thd) throw()
18451 {
18452   return (Virtual_tmp_table *) alloc_root(thd->mem_root, size);
18453 }
18454 
18455 
init(uint field_count)18456 bool Virtual_tmp_table::init(uint field_count)
18457 {
18458   uint *blob_field;
18459   uchar *bitmaps;
18460   DBUG_ENTER("Virtual_tmp_table::init");
18461   if (!multi_alloc_root(in_use->mem_root,
18462                         &s, sizeof(*s),
18463                         &field, (field_count + 1) * sizeof(Field*),
18464                         &blob_field, (field_count + 1) * sizeof(uint),
18465                         &bitmaps, bitmap_buffer_size(field_count) * 6,
18466                         NullS))
18467     DBUG_RETURN(true);
18468   s->reset();
18469   s->blob_field= blob_field;
18470   setup_tmp_table_column_bitmaps(this, bitmaps, field_count);
18471   m_alloced_field_count= field_count;
18472   DBUG_RETURN(false);
18473 };
18474 
18475 
add(List<Spvar_definition> & field_list)18476 bool Virtual_tmp_table::add(List<Spvar_definition> &field_list)
18477 {
18478   /* Create all fields and calculate the total length of record */
18479   Spvar_definition *cdef;            /* column definition */
18480   List_iterator_fast<Spvar_definition> it(field_list);
18481   DBUG_ENTER("Virtual_tmp_table::add");
18482   while ((cdef= it++))
18483   {
18484     Field *tmp;
18485     if (!(tmp= cdef->make_field(s, in_use->mem_root, 0,
18486                              (uchar*) (f_maybe_null(cdef->pack_flag) ? "" : 0),
18487                              f_maybe_null(cdef->pack_flag) ? 1 : 0,
18488                              &cdef->field_name)))
18489       DBUG_RETURN(true);
18490      add(tmp);
18491   }
18492   DBUG_RETURN(false);
18493 }
18494 
18495 
setup_field_pointers()18496 void Virtual_tmp_table::setup_field_pointers()
18497 {
18498   uchar *null_pos= record[0];
18499   uchar *field_pos= null_pos + s->null_bytes;
18500   uint null_bit= 1;
18501 
18502   for (Field **cur_ptr= field; *cur_ptr; ++cur_ptr)
18503   {
18504     Field *cur_field= *cur_ptr;
18505     if ((cur_field->flags & NOT_NULL_FLAG))
18506       cur_field->move_field(field_pos);
18507     else
18508     {
18509       cur_field->move_field(field_pos, (uchar*) null_pos, null_bit);
18510       null_bit<<= 1;
18511       if (null_bit == (uint)1 << 8)
18512       {
18513         ++null_pos;
18514         null_bit= 1;
18515       }
18516     }
18517     if (cur_field->type() == MYSQL_TYPE_BIT &&
18518         cur_field->key_type() == HA_KEYTYPE_BIT)
18519     {
18520       /* This is a Field_bit since key_type is HA_KEYTYPE_BIT */
18521       static_cast<Field_bit*>(cur_field)->set_bit_ptr(null_pos, null_bit);
18522       null_bit+= cur_field->field_length & 7;
18523       if (null_bit > 7)
18524       {
18525         null_pos++;
18526         null_bit-= 8;
18527       }
18528     }
18529     cur_field->reset();
18530     field_pos+= cur_field->pack_length();
18531   }
18532 }
18533 
18534 
open()18535 bool Virtual_tmp_table::open()
18536 {
18537   // Make sure that we added all the fields we planned to:
18538   DBUG_ASSERT(s->fields == m_alloced_field_count);
18539   field[s->fields]= NULL;            // mark the end of the list
18540   s->blob_field[s->blob_fields]= 0;  // mark the end of the list
18541 
18542   uint null_pack_length= (s->null_fields + 7) / 8; // NULL-bit array length
18543   s->reclength+= null_pack_length;
18544   s->rec_buff_length= ALIGN_SIZE(s->reclength + 1);
18545   if (!(record[0]= (uchar*) in_use->alloc(s->rec_buff_length)))
18546     return true;
18547   if (null_pack_length)
18548   {
18549     null_flags= (uchar*) record[0];
18550     s->null_bytes= s->null_bytes_for_compare= null_pack_length;
18551   }
18552   setup_field_pointers();
18553   return false;
18554 }
18555 
18556 
sp_find_field_by_name(uint * idx,const LEX_CSTRING & name) const18557 bool Virtual_tmp_table::sp_find_field_by_name(uint *idx,
18558                                               const LEX_CSTRING &name) const
18559 {
18560   Field *f;
18561   for (uint i= 0; (f= field[i]); i++)
18562   {
18563     // Use the same comparison style with sp_context::find_variable()
18564     if (!my_strnncoll(system_charset_info,
18565                       (const uchar *) f->field_name.str,
18566                       f->field_name.length,
18567                       (const uchar *) name.str, name.length))
18568     {
18569       *idx= i;
18570       return false;
18571     }
18572   }
18573   return true;
18574 }
18575 
18576 
18577 bool
sp_find_field_by_name_or_error(uint * idx,const LEX_CSTRING & var_name,const LEX_CSTRING & field_name) const18578 Virtual_tmp_table::sp_find_field_by_name_or_error(uint *idx,
18579                                                   const LEX_CSTRING &var_name,
18580                                                   const LEX_CSTRING &field_name)
18581                                                   const
18582 {
18583   if (sp_find_field_by_name(idx, field_name))
18584   {
18585     my_error(ER_ROW_VARIABLE_DOES_NOT_HAVE_FIELD, MYF(0),
18586              var_name.str, field_name.str);
18587     return true;
18588   }
18589   return false;
18590 }
18591 
18592 
sp_set_all_fields_from_item_list(THD * thd,List<Item> & items)18593 bool Virtual_tmp_table::sp_set_all_fields_from_item_list(THD *thd,
18594                                                          List<Item> &items)
18595 {
18596   DBUG_ASSERT(s->fields == items.elements);
18597   List_iterator<Item> it(items);
18598   Item *item;
18599   for (uint i= 0 ; (item= it++) ; i++)
18600   {
18601     if (field[i]->sp_prepare_and_store_item(thd, &item))
18602       return true;
18603   }
18604   return false;
18605 }
18606 
18607 
sp_set_all_fields_from_item(THD * thd,Item * value)18608 bool Virtual_tmp_table::sp_set_all_fields_from_item(THD *thd, Item *value)
18609 {
18610   DBUG_ASSERT(value->fixed);
18611   DBUG_ASSERT(value->cols() == s->fields);
18612   for (uint i= 0; i < value->cols(); i++)
18613   {
18614     if (field[i]->sp_prepare_and_store_item(thd, value->addr(i)))
18615       return true;
18616   }
18617   return false;
18618 }
18619 
18620 
open_tmp_table(TABLE * table)18621 bool open_tmp_table(TABLE *table)
18622 {
18623   int error;
18624   if (unlikely((error= table->file->ha_open(table, table->s->path.str, O_RDWR,
18625                                             HA_OPEN_TMP_TABLE |
18626                                             HA_OPEN_INTERNAL_TABLE))))
18627   {
18628     table->file->print_error(error, MYF(0)); /* purecov: inspected */
18629     table->db_stat= 0;
18630     return 1;
18631   }
18632   table->db_stat= HA_OPEN_KEYFILE;
18633   (void) table->file->extra(HA_EXTRA_QUICK); /* Faster */
18634   if (!table->is_created())
18635   {
18636     table->set_created();
18637     table->in_use->inc_status_created_tmp_tables();
18638   }
18639 
18640   return 0;
18641 }
18642 
18643 
18644 #ifdef USE_ARIA_FOR_TMP_TABLES
18645 /*
18646   Create internal (MyISAM or Maria) temporary table
18647 
18648   SYNOPSIS
18649     create_internal_tmp_table()
18650       table           Table object that descrimes the table to be created
18651       keyinfo         Description of the index (there is always one index)
18652       start_recinfo   engine's column descriptions
18653       recinfo INOUT   End of engine's column descriptions
18654       options         Option bits
18655 
18656   DESCRIPTION
18657     Create an internal emporary table according to passed description. The is
18658     assumed to have one unique index or constraint.
18659 
18660     The passed array or TMP_ENGINE_COLUMNDEF structures must have this form:
18661 
18662       1. 1-byte column (afaiu for 'deleted' flag) (note maybe not 1-byte
18663          when there are many nullable columns)
18664       2. Table columns
18665       3. One free TMP_ENGINE_COLUMNDEF element (*recinfo points here)
18666 
18667     This function may use the free element to create hash column for unique
18668     constraint.
18669 
18670    RETURN
18671      FALSE - OK
18672      TRUE  - Error
18673 */
18674 
18675 
create_internal_tmp_table(TABLE * table,KEY * keyinfo,TMP_ENGINE_COLUMNDEF * start_recinfo,TMP_ENGINE_COLUMNDEF ** recinfo,ulonglong options)18676 bool create_internal_tmp_table(TABLE *table, KEY *keyinfo,
18677                                TMP_ENGINE_COLUMNDEF *start_recinfo,
18678                                TMP_ENGINE_COLUMNDEF **recinfo,
18679                                ulonglong options)
18680 {
18681   int error;
18682   MARIA_KEYDEF keydef;
18683   MARIA_UNIQUEDEF uniquedef;
18684   TABLE_SHARE *share= table->s;
18685   MARIA_CREATE_INFO create_info;
18686   DBUG_ENTER("create_internal_tmp_table");
18687 
18688   if (share->keys)
18689   {						// Get keys for ni_create
18690     bool using_unique_constraint=0;
18691     HA_KEYSEG *seg= (HA_KEYSEG*) alloc_root(&table->mem_root,
18692                                             sizeof(*seg) * keyinfo->user_defined_key_parts);
18693     if (!seg)
18694       goto err;
18695 
18696     bzero(seg, sizeof(*seg) * keyinfo->user_defined_key_parts);
18697     /*
18698        Note that a similar check is performed during
18699        subquery_types_allow_materialization. See MDEV-7122 for more details as
18700        to why. Whenever this changes, it must be updated there as well, for
18701        all tmp_table engines.
18702     */
18703     if (keyinfo->key_length > table->file->max_key_length() ||
18704 	keyinfo->user_defined_key_parts > table->file->max_key_parts() ||
18705 	share->uniques)
18706     {
18707       if (!share->uniques && !(keyinfo->flags & HA_NOSAME))
18708       {
18709         my_error(ER_INTERNAL_ERROR, MYF(0),
18710                  "Using too big key for internal temp tables");
18711         DBUG_RETURN(1);
18712       }
18713 
18714       /* Can't create a key; Make a unique constraint instead of a key */
18715       share->keys=    0;
18716       share->uniques= 1;
18717       using_unique_constraint=1;
18718       bzero((char*) &uniquedef,sizeof(uniquedef));
18719       uniquedef.keysegs=keyinfo->user_defined_key_parts;
18720       uniquedef.seg=seg;
18721       uniquedef.null_are_equal=1;
18722 
18723       /* Create extra column for hash value */
18724       bzero((uchar*) *recinfo,sizeof(**recinfo));
18725       (*recinfo)->type=   FIELD_CHECK;
18726       (*recinfo)->length= MARIA_UNIQUE_HASH_LENGTH;
18727       (*recinfo)++;
18728 
18729       /* Avoid warnings from valgrind */
18730       bzero(table->record[0]+ share->reclength, MARIA_UNIQUE_HASH_LENGTH);
18731       bzero(share->default_values+ share->reclength, MARIA_UNIQUE_HASH_LENGTH);
18732       share->reclength+= MARIA_UNIQUE_HASH_LENGTH;
18733     }
18734     else
18735     {
18736       /* Create a key */
18737       bzero((char*) &keydef,sizeof(keydef));
18738       keydef.flag= keyinfo->flags & HA_NOSAME;
18739       keydef.keysegs=  keyinfo->user_defined_key_parts;
18740       keydef.seg= seg;
18741     }
18742     for (uint i=0; i < keyinfo->user_defined_key_parts ; i++,seg++)
18743     {
18744       Field *field=keyinfo->key_part[i].field;
18745       seg->flag=     0;
18746       seg->language= field->charset()->number;
18747       seg->length=   keyinfo->key_part[i].length;
18748       seg->start=    keyinfo->key_part[i].offset;
18749       if (field->flags & BLOB_FLAG)
18750       {
18751 	seg->type=
18752 	((keyinfo->key_part[i].key_type & FIELDFLAG_BINARY) ?
18753 	 HA_KEYTYPE_VARBINARY2 : HA_KEYTYPE_VARTEXT2);
18754 	seg->bit_start= (uint8)(field->pack_length() -
18755                                 portable_sizeof_char_ptr);
18756 	seg->flag= HA_BLOB_PART;
18757 	seg->length=0;			// Whole blob in unique constraint
18758       }
18759       else
18760       {
18761 	seg->type= keyinfo->key_part[i].type;
18762         /* Tell handler if it can do suffic space compression */
18763 	if (field->real_type() == MYSQL_TYPE_STRING &&
18764 	    keyinfo->key_part[i].length > 32)
18765 	  seg->flag|= HA_SPACE_PACK;
18766       }
18767       if (!(field->flags & NOT_NULL_FLAG))
18768       {
18769 	seg->null_bit= field->null_bit;
18770 	seg->null_pos= (uint) (field->null_ptr - (uchar*) table->record[0]);
18771 	/*
18772 	  We are using a GROUP BY on something that contains NULL
18773 	  In this case we have to tell Aria that two NULL should
18774 	  on INSERT be regarded at the same value
18775 	*/
18776 	if (!using_unique_constraint)
18777 	  keydef.flag|= HA_NULL_ARE_EQUAL;
18778       }
18779     }
18780   }
18781   bzero((char*) &create_info,sizeof(create_info));
18782   create_info.data_file_length= table->in_use->variables.tmp_disk_table_size;
18783 
18784   /*
18785     The logic for choosing the record format:
18786     The STATIC_RECORD format is the fastest one, because it's so simple,
18787     so we use this by default for short rows.
18788     BLOCK_RECORD caches both row and data, so this is generally faster than
18789     DYNAMIC_RECORD. The one exception is when we write to tmp table and
18790     want to use keys for duplicate elimination as with BLOCK RECORD
18791     we first write the row, then check for key conflicts and then we have to
18792     delete the row.  The cases when this can happen is when there is
18793     a group by and no sum functions or if distinct is used.
18794   */
18795   {
18796     enum data_file_type file_type= table->no_rows ? NO_RECORD :
18797         (share->reclength < 64 && !share->blob_fields ? STATIC_RECORD :
18798          table->used_for_duplicate_elimination ? DYNAMIC_RECORD : BLOCK_RECORD);
18799     uint create_flags= HA_CREATE_TMP_TABLE | HA_CREATE_INTERNAL_TABLE |
18800         (table->keep_row_order ? HA_PRESERVE_INSERT_ORDER : 0);
18801 
18802     if (file_type != NO_RECORD && encrypt_tmp_disk_tables)
18803     {
18804       /* encryption is only supported for BLOCK_RECORD */
18805       file_type= BLOCK_RECORD;
18806       if (table->used_for_duplicate_elimination)
18807       {
18808         /*
18809           sql-layer expect the last column to be stored/restored also
18810           when it's null.
18811 
18812           This is probably a bug (that sql-layer doesn't annotate
18813           the column as not-null) but both heap, aria-static, aria-dynamic and
18814           myisam has this property. aria-block_record does not since it
18815           does not store null-columns at all.
18816           Emulate behaviour by making column not-nullable when creating the
18817           table.
18818         */
18819         uint cols= (uint)(*recinfo-start_recinfo);
18820         start_recinfo[cols-1].null_bit= 0;
18821       }
18822     }
18823 
18824     if (unlikely((error= maria_create(share->path.str, file_type, share->keys,
18825                                       &keydef, (uint) (*recinfo-start_recinfo),
18826                                       start_recinfo, share->uniques, &uniquedef,
18827                                       &create_info, create_flags))))
18828     {
18829       table->file->print_error(error,MYF(0));	/* purecov: inspected */
18830       table->db_stat=0;
18831       goto err;
18832     }
18833   }
18834 
18835   table->in_use->inc_status_created_tmp_disk_tables();
18836   table->in_use->inc_status_created_tmp_tables();
18837   share->db_record_offset= 1;
18838   table->set_created();
18839   DBUG_RETURN(0);
18840  err:
18841   DBUG_RETURN(1);
18842 }
18843 
18844 #else
18845 
18846 /*
18847   Create internal (MyISAM or Maria) temporary table
18848 
18849   SYNOPSIS
18850     create_internal_tmp_table()
18851       table           Table object that descrimes the table to be created
18852       keyinfo         Description of the index (there is always one index)
18853       start_recinfo   engine's column descriptions
18854       recinfo INOUT   End of engine's column descriptions
18855       options         Option bits
18856 
18857   DESCRIPTION
18858     Create an internal emporary table according to passed description. The is
18859     assumed to have one unique index or constraint.
18860 
18861     The passed array or TMP_ENGINE_COLUMNDEF structures must have this form:
18862 
18863       1. 1-byte column (afaiu for 'deleted' flag) (note maybe not 1-byte
18864          when there are many nullable columns)
18865       2. Table columns
18866       3. One free TMP_ENGINE_COLUMNDEF element (*recinfo points here)
18867 
18868     This function may use the free element to create hash column for unique
18869     constraint.
18870 
18871    RETURN
18872      FALSE - OK
18873      TRUE  - Error
18874 */
18875 
18876 /* Create internal MyISAM temporary table */
18877 
create_internal_tmp_table(TABLE * table,KEY * keyinfo,TMP_ENGINE_COLUMNDEF * start_recinfo,TMP_ENGINE_COLUMNDEF ** recinfo,ulonglong options)18878 bool create_internal_tmp_table(TABLE *table, KEY *keyinfo,
18879                                TMP_ENGINE_COLUMNDEF *start_recinfo,
18880                                TMP_ENGINE_COLUMNDEF **recinfo,
18881                                ulonglong options)
18882 {
18883   int error;
18884   MI_KEYDEF keydef;
18885   MI_UNIQUEDEF uniquedef;
18886   TABLE_SHARE *share= table->s;
18887   DBUG_ENTER("create_internal_tmp_table");
18888 
18889   if (share->keys)
18890   {						// Get keys for ni_create
18891     bool using_unique_constraint=0;
18892     HA_KEYSEG *seg= (HA_KEYSEG*) alloc_root(&table->mem_root,
18893                                             sizeof(*seg) * keyinfo->user_defined_key_parts);
18894     if (!seg)
18895       goto err;
18896 
18897     bzero(seg, sizeof(*seg) * keyinfo->user_defined_key_parts);
18898     /*
18899        Note that a similar check is performed during
18900        subquery_types_allow_materialization. See MDEV-7122 for more details as
18901        to why. Whenever this changes, it must be updated there as well, for
18902        all tmp_table engines.
18903     */
18904     if (keyinfo->key_length > table->file->max_key_length() ||
18905 	keyinfo->user_defined_key_parts > table->file->max_key_parts() ||
18906 	share->uniques)
18907     {
18908       /* Can't create a key; Make a unique constraint instead of a key */
18909       share->keys=    0;
18910       share->uniques= 1;
18911       using_unique_constraint=1;
18912       bzero((char*) &uniquedef,sizeof(uniquedef));
18913       uniquedef.keysegs=keyinfo->user_defined_key_parts;
18914       uniquedef.seg=seg;
18915       uniquedef.null_are_equal=1;
18916 
18917       /* Create extra column for hash value */
18918       bzero((uchar*) *recinfo,sizeof(**recinfo));
18919       (*recinfo)->type= FIELD_CHECK;
18920       (*recinfo)->length=MI_UNIQUE_HASH_LENGTH;
18921       (*recinfo)++;
18922       /* Avoid warnings from valgrind */
18923       bzero(table->record[0]+ share->reclength, MI_UNIQUE_HASH_LENGTH);
18924       bzero(share->default_values+ share->reclength, MI_UNIQUE_HASH_LENGTH);
18925       share->reclength+= MI_UNIQUE_HASH_LENGTH;
18926     }
18927     else
18928     {
18929       /* Create an unique key */
18930       bzero((char*) &keydef,sizeof(keydef));
18931       keydef.flag= ((keyinfo->flags & HA_NOSAME) | HA_BINARY_PACK_KEY |
18932                     HA_PACK_KEY);
18933       keydef.keysegs=  keyinfo->user_defined_key_parts;
18934       keydef.seg= seg;
18935     }
18936     for (uint i=0; i < keyinfo->user_defined_key_parts ; i++,seg++)
18937     {
18938       Field *field=keyinfo->key_part[i].field;
18939       seg->flag=     0;
18940       seg->language= field->charset()->number;
18941       seg->length=   keyinfo->key_part[i].length;
18942       seg->start=    keyinfo->key_part[i].offset;
18943       if (field->flags & BLOB_FLAG)
18944       {
18945 	seg->type=
18946 	((keyinfo->key_part[i].key_type & FIELDFLAG_BINARY) ?
18947 	 HA_KEYTYPE_VARBINARY2 : HA_KEYTYPE_VARTEXT2);
18948 	seg->bit_start= (uint8)(field->pack_length() - portable_sizeof_char_ptr);
18949 	seg->flag= HA_BLOB_PART;
18950 	seg->length=0;			// Whole blob in unique constraint
18951       }
18952       else
18953       {
18954 	seg->type= keyinfo->key_part[i].type;
18955         /* Tell handler if it can do suffic space compression */
18956 	if (field->real_type() == MYSQL_TYPE_STRING &&
18957 	    keyinfo->key_part[i].length > 4)
18958 	  seg->flag|= HA_SPACE_PACK;
18959       }
18960       if (!(field->flags & NOT_NULL_FLAG))
18961       {
18962 	seg->null_bit= field->null_bit;
18963 	seg->null_pos= (uint) (field->null_ptr - (uchar*) table->record[0]);
18964 	/*
18965 	  We are using a GROUP BY on something that contains NULL
18966 	  In this case we have to tell MyISAM that two NULL should
18967 	  on INSERT be regarded at the same value
18968 	*/
18969 	if (!using_unique_constraint)
18970 	  keydef.flag|= HA_NULL_ARE_EQUAL;
18971       }
18972     }
18973   }
18974   MI_CREATE_INFO create_info;
18975   bzero((char*) &create_info,sizeof(create_info));
18976   create_info.data_file_length= table->in_use->variables.tmp_disk_table_size;
18977 
18978   if (unlikely((error= mi_create(share->path.str, share->keys, &keydef,
18979 		                 (uint) (*recinfo-start_recinfo),
18980                                  start_recinfo,
18981 		                 share->uniques, &uniquedef,
18982                                  &create_info,
18983 		                 HA_CREATE_TMP_TABLE |
18984                                  HA_CREATE_INTERNAL_TABLE |
18985                                  ((share->db_create_options &
18986                                    HA_OPTION_PACK_RECORD) ?
18987                                   HA_PACK_RECORD : 0)
18988                                  ))))
18989   {
18990     table->file->print_error(error,MYF(0));	/* purecov: inspected */
18991     table->db_stat=0;
18992     goto err;
18993   }
18994   table->in_use->inc_status_created_tmp_disk_tables();
18995   table->in_use->inc_status_created_tmp_tables();
18996   share->db_record_offset= 1;
18997   table->set_created();
18998   DBUG_RETURN(0);
18999  err:
19000   DBUG_RETURN(1);
19001 }
19002 
19003 #endif /* USE_ARIA_FOR_TMP_TABLES */
19004 
19005 
19006 /*
19007   If a HEAP table gets full, create a internal table in MyISAM or Maria
19008   and copy all rows to this
19009 */
19010 
19011 
19012 bool
create_internal_tmp_table_from_heap(THD * thd,TABLE * table,TMP_ENGINE_COLUMNDEF * start_recinfo,TMP_ENGINE_COLUMNDEF ** recinfo,int error,bool ignore_last_dupp_key_error,bool * is_duplicate)19013 create_internal_tmp_table_from_heap(THD *thd, TABLE *table,
19014                                     TMP_ENGINE_COLUMNDEF *start_recinfo,
19015                                     TMP_ENGINE_COLUMNDEF **recinfo,
19016                                     int error,
19017                                     bool ignore_last_dupp_key_error,
19018                                     bool *is_duplicate)
19019 {
19020   TABLE new_table;
19021   TABLE_SHARE share;
19022   const char *save_proc_info;
19023   int write_err= 0;
19024   DBUG_ENTER("create_internal_tmp_table_from_heap");
19025   if (is_duplicate)
19026     *is_duplicate= FALSE;
19027 
19028   if (table->s->db_type() != heap_hton || error != HA_ERR_RECORD_FILE_FULL)
19029   {
19030     /*
19031       We don't want this error to be converted to a warning, e.g. in case of
19032       INSERT IGNORE ... SELECT.
19033     */
19034     table->file->print_error(error, MYF(ME_FATALERROR));
19035     DBUG_RETURN(1);
19036   }
19037   new_table= *table;
19038   share= *table->s;
19039   new_table.s= &share;
19040   new_table.s->db_plugin= ha_lock_engine(thd, TMP_ENGINE_HTON);
19041   if (unlikely(!(new_table.file= get_new_handler(&share, &new_table.mem_root,
19042                                                  new_table.s->db_type()))))
19043     DBUG_RETURN(1);				// End of memory
19044 
19045   if (unlikely(new_table.file->set_ha_share_ref(&share.ha_share)))
19046   {
19047     delete new_table.file;
19048     DBUG_RETURN(1);
19049   }
19050 
19051   save_proc_info=thd->proc_info;
19052   THD_STAGE_INFO(thd, stage_converting_heap_to_myisam);
19053 
19054   new_table.no_rows= table->no_rows;
19055   if (create_internal_tmp_table(&new_table, table->key_info, start_recinfo,
19056                                 recinfo,
19057                                 thd->lex->select_lex.options |
19058 			        thd->variables.option_bits))
19059     goto err2;
19060   if (open_tmp_table(&new_table))
19061     goto err1;
19062   if (table->file->indexes_are_disabled())
19063     new_table.file->ha_disable_indexes(HA_KEY_SWITCH_ALL);
19064   table->file->ha_index_or_rnd_end();
19065   if (table->file->ha_rnd_init_with_error(1))
19066     DBUG_RETURN(1);
19067   if (new_table.no_rows)
19068     new_table.file->extra(HA_EXTRA_NO_ROWS);
19069   else
19070   {
19071     /* update table->file->stats.records */
19072     table->file->info(HA_STATUS_VARIABLE);
19073     new_table.file->ha_start_bulk_insert(table->file->stats.records);
19074   }
19075 
19076   /*
19077     copy all old rows from heap table to MyISAM table
19078     This is the only code that uses record[1] to read/write but this
19079     is safe as this is a temporary MyISAM table without timestamp/autoincrement
19080     or partitioning.
19081   */
19082   while (!table->file->ha_rnd_next(new_table.record[1]))
19083   {
19084     write_err= new_table.file->ha_write_tmp_row(new_table.record[1]);
19085     DBUG_EXECUTE_IF("raise_error", write_err= HA_ERR_FOUND_DUPP_KEY ;);
19086     if (write_err)
19087       goto err;
19088     if (unlikely(thd->check_killed()))
19089       goto err_killed;
19090   }
19091   if (!new_table.no_rows && new_table.file->ha_end_bulk_insert())
19092     goto err;
19093   /* copy row that filled HEAP table */
19094   if (unlikely((write_err=new_table.file->ha_write_tmp_row(table->record[0]))))
19095   {
19096     if (new_table.file->is_fatal_error(write_err, HA_CHECK_DUP) ||
19097 	!ignore_last_dupp_key_error)
19098       goto err;
19099     if (is_duplicate)
19100       *is_duplicate= TRUE;
19101   }
19102   else
19103   {
19104     if (is_duplicate)
19105       *is_duplicate= FALSE;
19106   }
19107 
19108   /* remove heap table and change to use myisam table */
19109   (void) table->file->ha_rnd_end();
19110   (void) table->file->ha_close();          // This deletes the table !
19111   delete table->file;
19112   table->file=0;
19113   plugin_unlock(0, table->s->db_plugin);
19114   share.db_plugin= my_plugin_lock(0, share.db_plugin);
19115   new_table.s= table->s;                       // Keep old share
19116   *table= new_table;
19117   *table->s= share;
19118 
19119   table->file->change_table_ptr(table, table->s);
19120   table->use_all_columns();
19121   if (save_proc_info)
19122     thd_proc_info(thd, (!strcmp(save_proc_info,"Copying to tmp table") ?
19123                   "Copying to tmp table on disk" : save_proc_info));
19124   DBUG_RETURN(0);
19125 
19126  err:
19127   DBUG_PRINT("error",("Got error: %d",write_err));
19128   table->file->print_error(write_err, MYF(0));
19129 err_killed:
19130   (void) table->file->ha_rnd_end();
19131   (void) new_table.file->ha_close();
19132  err1:
19133   new_table.file->ha_delete_table(new_table.s->path.str);
19134  err2:
19135   delete new_table.file;
19136   thd_proc_info(thd, save_proc_info);
19137   table->mem_root= new_table.mem_root;
19138   DBUG_RETURN(1);
19139 }
19140 
19141 
19142 void
free_tmp_table(THD * thd,TABLE * entry)19143 free_tmp_table(THD *thd, TABLE *entry)
19144 {
19145   MEM_ROOT own_root= entry->mem_root;
19146   const char *save_proc_info;
19147   DBUG_ENTER("free_tmp_table");
19148   DBUG_PRINT("enter",("table: %s  alias: %s",entry->s->table_name.str,
19149                       entry->alias.c_ptr()));
19150 
19151   save_proc_info=thd->proc_info;
19152   THD_STAGE_INFO(thd, stage_removing_tmp_table);
19153 
19154   if (entry->file && entry->is_created())
19155   {
19156     entry->file->ha_index_or_rnd_end();
19157     if (entry->db_stat)
19158     {
19159       entry->file->info(HA_STATUS_VARIABLE);
19160       thd->tmp_tables_size+= (entry->file->stats.data_file_length +
19161                               entry->file->stats.index_file_length);
19162       entry->file->ha_drop_table(entry->s->path.str);
19163     }
19164     else
19165       entry->file->ha_delete_table(entry->s->path.str);
19166     delete entry->file;
19167   }
19168 
19169   /* free blobs */
19170   for (Field **ptr=entry->field ; *ptr ; ptr++)
19171     (*ptr)->free();
19172 
19173   if (entry->temp_pool_slot != MY_BIT_NONE)
19174     bitmap_lock_clear_bit(&temp_pool, entry->temp_pool_slot);
19175 
19176   plugin_unlock(0, entry->s->db_plugin);
19177   entry->alias.free();
19178 
19179   if (entry->pos_in_table_list && entry->pos_in_table_list->table)
19180   {
19181     DBUG_ASSERT(entry->pos_in_table_list->table == entry);
19182     entry->pos_in_table_list->table= NULL;
19183   }
19184 
19185   free_root(&own_root, MYF(0)); /* the table is allocated in its own root */
19186   thd_proc_info(thd, save_proc_info);
19187 
19188   DBUG_VOID_RETURN;
19189 }
19190 
19191 
19192 /**
19193   @brief
19194   Set write_func of AGGR_OP object
19195 
19196   @param join_tab JOIN_TAB of the corresponding tmp table
19197 
19198   @details
19199   Function sets up write_func according to how AGGR_OP object that
19200   is attached to the given join_tab will be used in the query.
19201 */
19202 
set_postjoin_aggr_write_func(JOIN_TAB * tab)19203 void set_postjoin_aggr_write_func(JOIN_TAB *tab)
19204 {
19205   JOIN *join= tab->join;
19206   TABLE *table= tab->table;
19207   AGGR_OP *aggr= tab->aggr;
19208   TMP_TABLE_PARAM *tmp_tbl= tab->tmp_table_param;
19209 
19210   DBUG_ASSERT(table && aggr);
19211 
19212   if (table->group && tmp_tbl->sum_func_count &&
19213       !tmp_tbl->precomputed_group_by)
19214   {
19215     /*
19216       Note for MyISAM tmp tables: if uniques is true keys won't be
19217       created.
19218     */
19219     if (table->s->keys && !table->s->uniques)
19220     {
19221       DBUG_PRINT("info",("Using end_update"));
19222       aggr->set_write_func(end_update);
19223     }
19224     else
19225     {
19226       DBUG_PRINT("info",("Using end_unique_update"));
19227       aggr->set_write_func(end_unique_update);
19228     }
19229   }
19230   else if (join->sort_and_group && !tmp_tbl->precomputed_group_by &&
19231            !join->sort_and_group_aggr_tab && join->tables_list &&
19232            join->top_join_tab_count)
19233   {
19234     DBUG_PRINT("info",("Using end_write_group"));
19235     aggr->set_write_func(end_write_group);
19236     join->sort_and_group_aggr_tab= tab;
19237   }
19238   else
19239   {
19240     DBUG_PRINT("info",("Using end_write"));
19241     aggr->set_write_func(end_write);
19242     if (tmp_tbl->precomputed_group_by)
19243     {
19244       /*
19245         A preceding call to create_tmp_table in the case when loose
19246         index scan is used guarantees that
19247         TMP_TABLE_PARAM::items_to_copy has enough space for the group
19248         by functions. It is OK here to use memcpy since we copy
19249         Item_sum pointers into an array of Item pointers.
19250       */
19251       memcpy(tmp_tbl->items_to_copy + tmp_tbl->func_count,
19252              join->sum_funcs,
19253              sizeof(Item*)*tmp_tbl->sum_func_count);
19254       tmp_tbl->items_to_copy[tmp_tbl->func_count+tmp_tbl->sum_func_count]= 0;
19255     }
19256   }
19257 }
19258 
19259 
19260 /**
19261   @details
19262   Rows produced by a join sweep may end up in a temporary table or be sent
19263   to a client. Set the function of the nested loop join algorithm which
19264   handles final fully constructed and matched records.
19265 
19266   @param join   join to setup the function for.
19267 
19268   @return
19269     end_select function to use. This function can't fail.
19270 */
19271 
setup_end_select_func(JOIN * join,JOIN_TAB * tab)19272 Next_select_func setup_end_select_func(JOIN *join, JOIN_TAB *tab)
19273 {
19274   TMP_TABLE_PARAM *tmp_tbl= tab ? tab->tmp_table_param : &join->tmp_table_param;
19275 
19276   /*
19277      Choose method for presenting result to user. Use end_send_group
19278      if the query requires grouping (has a GROUP BY clause and/or one or
19279      more aggregate functions). Use end_send if the query should not
19280      be grouped.
19281    */
19282   if (join->sort_and_group && !tmp_tbl->precomputed_group_by)
19283   {
19284     DBUG_PRINT("info",("Using end_send_group"));
19285     return end_send_group;
19286   }
19287   DBUG_PRINT("info",("Using end_send"));
19288   return end_send;
19289 }
19290 
19291 
19292 /**
19293   Make a join of all tables and write it on socket or to table.
19294 
19295   @retval
19296     0  if ok
19297   @retval
19298     1  if error is sent
19299   @retval
19300     -1  if error should be sent
19301 */
19302 
19303 static int
do_select(JOIN * join,Procedure * procedure)19304 do_select(JOIN *join, Procedure *procedure)
19305 {
19306   int rc= 0;
19307   enum_nested_loop_state error= NESTED_LOOP_OK;
19308   DBUG_ENTER("do_select");
19309 
19310   if (join->pushdown_query)
19311   {
19312     /* Select fields are in the temporary table */
19313     join->fields= &join->tmp_fields_list1;
19314     /* Setup HAVING to work with fields in temporary table */
19315     join->set_items_ref_array(join->items1);
19316     /* The storage engine will take care of the group by query result */
19317     int res= join->pushdown_query->execute(join);
19318 
19319     if (res)
19320       DBUG_RETURN(res);
19321 
19322     if (join->pushdown_query->store_data_in_temp_table)
19323     {
19324       JOIN_TAB *last_tab= join->join_tab + join->table_count -
19325                           join->exec_join_tab_cnt();
19326       last_tab->next_select= end_send;
19327 
19328       enum_nested_loop_state state= last_tab->aggr->end_send();
19329       if (state >= NESTED_LOOP_OK)
19330         state= sub_select(join, last_tab, true);
19331 
19332       if (state < NESTED_LOOP_OK)
19333         res= 1;
19334 
19335       if (join->result->send_eof())
19336         res= 1;
19337     }
19338     DBUG_RETURN(res);
19339   }
19340 
19341   join->procedure= procedure;
19342   join->duplicate_rows= join->send_records=0;
19343   if (join->only_const_tables() && !join->need_tmp)
19344   {
19345     Next_select_func end_select= setup_end_select_func(join, NULL);
19346 
19347     /*
19348       HAVING will be checked after processing aggregate functions,
19349       But WHERE should checked here (we alredy have read tables).
19350       Notice that make_join_select() splits all conditions in this case
19351       into two groups exec_const_cond and outer_ref_cond.
19352       If join->table_count == join->const_tables then it is
19353       sufficient to check only the condition pseudo_bits_cond.
19354     */
19355     DBUG_ASSERT(join->outer_ref_cond == NULL);
19356     if (!join->pseudo_bits_cond || join->pseudo_bits_cond->val_int())
19357     {
19358       // HAVING will be checked by end_select
19359       error= (*end_select)(join, 0, 0);
19360       if (error >= NESTED_LOOP_OK)
19361 	error= (*end_select)(join, 0, 1);
19362 
19363       /*
19364         If we don't go through evaluate_join_record(), do the counting
19365         here.  join->send_records is increased on success in end_send(),
19366         so we don't touch it here.
19367       */
19368       join->join_examined_rows++;
19369       DBUG_ASSERT(join->join_examined_rows <= 1);
19370     }
19371     else if (join->send_row_on_empty_set())
19372     {
19373       table_map cleared_tables= (table_map) 0;
19374       if (end_select == end_send_group)
19375       {
19376         /*
19377           Was a grouping query but we did not find any rows. In this case
19378           we clear all tables to get null in any referenced fields,
19379           like in case of:
19380           SELECT MAX(a) AS f1, a AS f2 FROM t1 WHERE VALUE(a) IS NOT NULL
19381         */
19382         clear_tables(join, &cleared_tables);
19383       }
19384       if (!join->having || join->having->val_int())
19385       {
19386         List<Item> *columns_list= (procedure ? &join->procedure_fields_list :
19387                                    join->fields);
19388         rc= join->result->send_data(*columns_list) > 0;
19389       }
19390       /*
19391         We have to remove the null markings from the tables as this table
19392         may be part of a sub query that is re-evaluated
19393       */
19394       if (cleared_tables)
19395         unclear_tables(join, &cleared_tables);
19396     }
19397     /*
19398       An error can happen when evaluating the conds
19399       (the join condition and piece of where clause
19400       relevant to this join table).
19401     */
19402     if (unlikely(join->thd->is_error()))
19403       error= NESTED_LOOP_ERROR;
19404   }
19405   else
19406   {
19407     DBUG_EXECUTE_IF("show_explain_probe_do_select",
19408                     if (dbug_user_var_equals_int(join->thd,
19409                                                  "show_explain_probe_select_id",
19410                                                  join->select_lex->select_number))
19411                           dbug_serve_apcs(join->thd, 1);
19412                    );
19413 
19414     JOIN_TAB *join_tab= join->join_tab +
19415                         (join->tables_list ? join->const_tables : 0);
19416     if (join->outer_ref_cond && !join->outer_ref_cond->val_int())
19417       error= NESTED_LOOP_NO_MORE_ROWS;
19418     else
19419       error= join->first_select(join,join_tab,0);
19420     if (error >= NESTED_LOOP_OK && likely(join->thd->killed != ABORT_QUERY))
19421       error= join->first_select(join,join_tab,1);
19422   }
19423 
19424   join->thd->limit_found_rows= join->send_records - join->duplicate_rows;
19425 
19426   if (error == NESTED_LOOP_NO_MORE_ROWS ||
19427       unlikely(join->thd->killed == ABORT_QUERY))
19428     error= NESTED_LOOP_OK;
19429 
19430   /*
19431     For "order by with limit", we cannot rely on send_records, but need
19432     to use the rowcount read originally into the join_tab applying the
19433     filesort. There cannot be any post-filtering conditions, nor any
19434     following join_tabs in this case, so this rowcount properly represents
19435     the correct number of qualifying rows.
19436   */
19437   if (join->order)
19438   {
19439     // Save # of found records prior to cleanup
19440     JOIN_TAB *sort_tab;
19441     JOIN_TAB *join_tab= join->join_tab;
19442     uint const_tables= join->const_tables;
19443 
19444     // Take record count from first non constant table or from last tmp table
19445     if (join->aggr_tables > 0)
19446       sort_tab= join_tab + join->top_join_tab_count + join->aggr_tables - 1;
19447     else
19448     {
19449       DBUG_ASSERT(!join->only_const_tables());
19450       sort_tab= join_tab + const_tables;
19451     }
19452     if (sort_tab->filesort &&
19453         join->select_options & OPTION_FOUND_ROWS &&
19454         sort_tab->filesort->sortorder &&
19455         sort_tab->filesort->limit != HA_POS_ERROR)
19456     {
19457       join->thd->limit_found_rows= sort_tab->records;
19458     }
19459   }
19460 
19461   {
19462     /*
19463       The following will unlock all cursors if the command wasn't an
19464       update command
19465     */
19466     join->join_free();			// Unlock all cursors
19467   }
19468   if (error == NESTED_LOOP_OK)
19469   {
19470     /*
19471       Sic: this branch works even if rc != 0, e.g. when
19472       send_data above returns an error.
19473     */
19474     if (unlikely(join->result->send_eof()))
19475       rc= 1;                                  // Don't send error
19476     DBUG_PRINT("info",("%ld records output", (long) join->send_records));
19477   }
19478   else
19479     rc= -1;
19480 #ifndef DBUG_OFF
19481   if (rc)
19482   {
19483     DBUG_PRINT("error",("Error: do_select() failed"));
19484   }
19485 #endif
19486   rc= join->thd->is_error() ? -1 : rc;
19487   DBUG_RETURN(rc);
19488 }
19489 
19490 
rr_sequential_and_unpack(READ_RECORD * info)19491 int rr_sequential_and_unpack(READ_RECORD *info)
19492 {
19493   int error;
19494   if (unlikely((error= rr_sequential(info))))
19495     return error;
19496 
19497   for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++)
19498     (*cp->do_copy)(cp);
19499 
19500   return error;
19501 }
19502 
19503 
19504 /**
19505   @brief
19506   Instantiates temporary table
19507 
19508   @param  table           Table object that describes the table to be
19509                           instantiated
19510   @param  keyinfo         Description of the index (there is always one index)
19511   @param  start_recinfo   Column descriptions
19512   @param  recinfo INOUT   End of column descriptions
19513   @param  options         Option bits
19514 
19515   @details
19516     Creates tmp table and opens it.
19517 
19518   @return
19519      FALSE - OK
19520      TRUE  - Error
19521 */
19522 
instantiate_tmp_table(TABLE * table,KEY * keyinfo,TMP_ENGINE_COLUMNDEF * start_recinfo,TMP_ENGINE_COLUMNDEF ** recinfo,ulonglong options)19523 bool instantiate_tmp_table(TABLE *table, KEY *keyinfo,
19524                            TMP_ENGINE_COLUMNDEF *start_recinfo,
19525                            TMP_ENGINE_COLUMNDEF **recinfo,
19526                            ulonglong options)
19527 {
19528   if (table->s->db_type() == TMP_ENGINE_HTON)
19529   {
19530     /*
19531       If it is not heap (in-memory) table then convert index to unique
19532       constrain.
19533     */
19534     MEM_CHECK_DEFINED(table->record[0], table->s->reclength);
19535     if (create_internal_tmp_table(table, keyinfo, start_recinfo, recinfo,
19536                                   options))
19537       return TRUE;
19538     // Make empty record so random data is not written to disk
19539     empty_record(table);
19540     table->status= STATUS_NO_RECORD;
19541   }
19542   if (open_tmp_table(table))
19543     return TRUE;
19544 
19545   return FALSE;
19546 }
19547 
19548 
19549 /**
19550   @brief
19551   Accumulate rows of the result of an aggregation operation in a tmp table
19552 
19553   @param join  pointer to the structure providing all context info for the query
19554   @param join_tab the JOIN_TAB object to which the operation is attached
19555   @param end_records  TRUE <=> all records were accumulated, send them further
19556 
19557   @details
19558   This function accumulates records of the aggreagation operation for
19559   the node join_tab from the execution plan in a tmp table. To add a new
19560   record the function calls join_tab->aggr->put_records.
19561   When there is no more records to save, in this
19562   case the end_of_records argument == true, function tells the operation to
19563   send records further by calling aggr->send_records().
19564   When all records are sent this function passes 'end_of_records' signal
19565   further by calling sub_select() with end_of_records argument set to
19566   true. After that aggr->end_send() is called to tell the operation that
19567   it could end internal buffer scan.
19568 
19569   @note
19570   This function is not expected to be called when dynamic range scan is
19571   used to scan join_tab because  range scans aren't used for tmp tables.
19572 
19573   @return
19574     return one of enum_nested_loop_state.
19575 */
19576 
19577 enum_nested_loop_state
sub_select_postjoin_aggr(JOIN * join,JOIN_TAB * join_tab,bool end_of_records)19578 sub_select_postjoin_aggr(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
19579 {
19580   enum_nested_loop_state rc;
19581   AGGR_OP *aggr= join_tab->aggr;
19582 
19583   /* This function cannot be called if join_tab has no associated aggregation */
19584   DBUG_ASSERT(aggr != NULL);
19585 
19586   DBUG_ENTER("sub_select_aggr_tab");
19587 
19588   if (join->thd->killed)
19589   {
19590     /* The user has aborted the execution of the query */
19591     join->thd->send_kill_message();
19592     DBUG_RETURN(NESTED_LOOP_KILLED);
19593   }
19594 
19595   if (end_of_records)
19596   {
19597     rc= aggr->end_send();
19598     if (rc >= NESTED_LOOP_OK)
19599       rc= sub_select(join, join_tab, end_of_records);
19600     DBUG_RETURN(rc);
19601   }
19602 
19603   rc= aggr->put_record();
19604 
19605   DBUG_RETURN(rc);
19606 }
19607 
19608 
19609 /*
19610   Fill the join buffer with partial records, retrieve all full matches for
19611   them
19612 
19613   SYNOPSIS
19614     sub_select_cache()
19615       join         pointer to the structure providing all context info for the
19616                    query
19617       join_tab     the first next table of the execution plan to be retrieved
19618       end_records  true when we need to perform final steps of the retrieval
19619 
19620   DESCRIPTION
19621     For a given table Ti= join_tab from the sequence of tables of the chosen
19622     execution plan T1,...,Ti,...,Tn the function just put the partial record
19623     t1,...,t[i-1] into the join buffer associated with table Ti unless this
19624     is the last record added into the buffer. In this case,  the function
19625     additionally finds all matching full records for all partial
19626     records accumulated in the buffer, after which it cleans the buffer up.
19627     If a partial join record t1,...,ti is extended utilizing a dynamic
19628     range scan then it is not put into the join buffer. Rather all matching
19629     records are found for it at once by the function sub_select.
19630 
19631   NOTES
19632     The function implements the algorithmic schema for both Blocked Nested
19633     Loop Join and Batched Key Access Join. The difference can be seen only at
19634     the level of of the implementation of the put_record and join_records
19635     virtual methods for the cache object associated with the join_tab.
19636     The put_record method accumulates records in the cache, while the
19637     join_records method builds all matching join records and send them into
19638     the output stream.
19639 
19640   RETURN
19641     return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS.
19642 */
19643 
19644 enum_nested_loop_state
sub_select_cache(JOIN * join,JOIN_TAB * join_tab,bool end_of_records)19645 sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
19646 {
19647   enum_nested_loop_state rc;
19648   JOIN_CACHE *cache= join_tab->cache;
19649   DBUG_ENTER("sub_select_cache");
19650 
19651   /*
19652     This function cannot be called if join_tab has no associated join
19653     buffer
19654   */
19655   DBUG_ASSERT(cache != NULL);
19656 
19657   join_tab->cache->reset_join(join);
19658 
19659   if (end_of_records)
19660   {
19661     rc= cache->join_records(FALSE);
19662     if (rc == NESTED_LOOP_OK || rc == NESTED_LOOP_NO_MORE_ROWS ||
19663         rc == NESTED_LOOP_QUERY_LIMIT)
19664       rc= sub_select(join, join_tab, end_of_records);
19665     DBUG_RETURN(rc);
19666   }
19667   if (unlikely(join->thd->check_killed()))
19668   {
19669     /* The user has aborted the execution of the query */
19670     DBUG_RETURN(NESTED_LOOP_KILLED);
19671   }
19672   if (!test_if_use_dynamic_range_scan(join_tab))
19673   {
19674     if (!cache->put_record())
19675       DBUG_RETURN(NESTED_LOOP_OK);
19676     /*
19677       We has decided that after the record we've just put into the buffer
19678       won't add any more records. Now try to find all the matching
19679       extensions for all records in the buffer.
19680     */
19681     rc= cache->join_records(FALSE);
19682     DBUG_RETURN(rc);
19683   }
19684   /*
19685      TODO: Check whether we really need the call below and we can't do
19686            without it. If it's not the case remove it.
19687   */
19688   rc= cache->join_records(TRUE);
19689   if (rc == NESTED_LOOP_OK || rc == NESTED_LOOP_NO_MORE_ROWS ||
19690       rc == NESTED_LOOP_QUERY_LIMIT)
19691     rc= sub_select(join, join_tab, end_of_records);
19692   DBUG_RETURN(rc);
19693 }
19694 
19695 /**
19696   Retrieve records ends with a given beginning from the result of a join.
19697 
19698     For a given partial join record consisting of records from the tables
19699     preceding the table join_tab in the execution plan, the function
19700     retrieves all matching full records from the result set and
19701     send them to the result set stream.
19702 
19703   @note
19704     The function effectively implements the  final (n-k) nested loops
19705     of nested loops join algorithm, where k is the ordinal number of
19706     the join_tab table and n is the total number of tables in the join query.
19707     It performs nested loops joins with all conjunctive predicates from
19708     the where condition pushed as low to the tables as possible.
19709     E.g. for the query
19710     @code
19711       SELECT * FROM t1,t2,t3
19712       WHERE t1.a=t2.a AND t2.b=t3.b AND t1.a BETWEEN 5 AND 9
19713     @endcode
19714     the predicate (t1.a BETWEEN 5 AND 9) will be pushed to table t1,
19715     given the selected plan prescribes to nest retrievals of the
19716     joined tables in the following order: t1,t2,t3.
19717     A pushed down predicate are attached to the table which it pushed to,
19718     at the field join_tab->select_cond.
19719     When executing a nested loop of level k the function runs through
19720     the rows of 'join_tab' and for each row checks the pushed condition
19721     attached to the table.
19722     If it is false the function moves to the next row of the
19723     table. If the condition is true the function recursively executes (n-k-1)
19724     remaining embedded nested loops.
19725     The situation becomes more complicated if outer joins are involved in
19726     the execution plan. In this case the pushed down predicates can be
19727     checked only at certain conditions.
19728     Suppose for the query
19729     @code
19730       SELECT * FROM t1 LEFT JOIN (t2,t3) ON t3.a=t1.a
19731       WHERE t1>2 AND (t2.b>5 OR t2.b IS NULL)
19732     @endcode
19733     the optimizer has chosen a plan with the table order t1,t2,t3.
19734     The predicate P1=t1>2 will be pushed down to the table t1, while the
19735     predicate P2=(t2.b>5 OR t2.b IS NULL) will be attached to the table
19736     t2. But the second predicate can not be unconditionally tested right
19737     after a row from t2 has been read. This can be done only after the
19738     first row with t3.a=t1.a has been encountered.
19739     Thus, the second predicate P2 is supplied with a guarded value that are
19740     stored in the field 'found' of the first inner table for the outer join
19741     (table t2). When the first row with t3.a=t1.a for the  current row
19742     of table t1  appears, the value becomes true. For now on the predicate
19743     is evaluated immediately after the row of table t2 has been read.
19744     When the first row with t3.a=t1.a has been encountered all
19745     conditions attached to the inner tables t2,t3 must be evaluated.
19746     Only when all of them are true the row is sent to the output stream.
19747     If not, the function returns to the lowest nest level that has a false
19748     attached condition.
19749     The predicates from on expressions are also pushed down. If in the
19750     the above example the on expression were (t3.a=t1.a AND t2.a=t1.a),
19751     then t1.a=t2.a would be pushed down to table t2, and without any
19752     guard.
19753     If after the run through all rows of table t2, the first inner table
19754     for the outer join operation, it turns out that no matches are
19755     found for the current row of t1, then current row from table t1
19756     is complemented by nulls  for t2 and t3. Then the pushed down predicates
19757     are checked for the composed row almost in the same way as it had
19758     been done for the first row with a match. The only difference is
19759     the predicates from on expressions are not checked.
19760 
19761   @par
19762   @b IMPLEMENTATION
19763   @par
19764     The function forms output rows for a current partial join of k
19765     tables tables recursively.
19766     For each partial join record ending with a certain row from
19767     join_tab it calls sub_select that builds all possible matching
19768     tails from the result set.
19769     To be able  check predicates conditionally items of the class
19770     Item_func_trig_cond are employed.
19771     An object of  this class is constructed from an item of class COND
19772     and a pointer to a guarding boolean variable.
19773     When the value of the guard variable is true the value of the object
19774     is the same as the value of the predicate, otherwise it's just returns
19775     true.
19776     To carry out a return to a nested loop level of join table t the pointer
19777     to t is remembered in the field 'return_rtab' of the join structure.
19778     Consider the following query:
19779     @code
19780         SELECT * FROM t1,
19781                       LEFT JOIN
19782                       (t2, t3 LEFT JOIN (t4,t5) ON t5.a=t3.a)
19783                       ON t4.a=t2.a
19784            WHERE (t2.b=5 OR t2.b IS NULL) AND (t4.b=2 OR t4.b IS NULL)
19785     @endcode
19786     Suppose the chosen execution plan dictates the order t1,t2,t3,t4,t5
19787     and suppose for a given joined rows from tables t1,t2,t3 there are
19788     no rows in the result set yet.
19789     When first row from t5 that satisfies the on condition
19790     t5.a=t3.a is found, the pushed down predicate t4.b=2 OR t4.b IS NULL
19791     becomes 'activated', as well the predicate t4.a=t2.a. But
19792     the predicate (t2.b=5 OR t2.b IS NULL) can not be checked until
19793     t4.a=t2.a becomes true.
19794     In order not to re-evaluate the predicates that were already evaluated
19795     as attached pushed down predicates, a pointer to the the first
19796     most inner unmatched table is maintained in join_tab->first_unmatched.
19797     Thus, when the first row from t5 with t5.a=t3.a is found
19798     this pointer for t5 is changed from t4 to t2.
19799 
19800     @par
19801     @b STRUCTURE @b NOTES
19802     @par
19803     join_tab->first_unmatched points always backwards to the first inner
19804     table of the embedding nested join, if any.
19805 
19806   @param join      pointer to the structure providing all context info for
19807                    the query
19808   @param join_tab  the first next table of the execution plan to be retrieved
19809   @param end_records  true when we need to perform final steps of retrival
19810 
19811   @return
19812     return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS.
19813 */
19814 
19815 enum_nested_loop_state
sub_select(JOIN * join,JOIN_TAB * join_tab,bool end_of_records)19816 sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
19817 {
19818   DBUG_ENTER("sub_select");
19819 
19820   if (join_tab->last_inner)
19821   {
19822     JOIN_TAB *last_inner_tab= join_tab->last_inner;
19823     for (JOIN_TAB  *jt= join_tab; jt <= last_inner_tab; jt++)
19824       jt->table->null_row= 0;
19825   }
19826   else
19827     join_tab->table->null_row=0;
19828 
19829   if (end_of_records)
19830   {
19831     enum_nested_loop_state nls=
19832       (*join_tab->next_select)(join,join_tab+1,end_of_records);
19833     DBUG_RETURN(nls);
19834   }
19835   join_tab->tracker->r_scans++;
19836 
19837   int error;
19838   enum_nested_loop_state rc= NESTED_LOOP_OK;
19839   READ_RECORD *info= &join_tab->read_record;
19840 
19841 
19842   for (SJ_TMP_TABLE *flush_dups_table= join_tab->flush_weedout_table;
19843        flush_dups_table;
19844        flush_dups_table= flush_dups_table->next_flush_table)
19845   {
19846     flush_dups_table->sj_weedout_delete_rows();
19847   }
19848 
19849   if (!join_tab->preread_init_done && join_tab->preread_init())
19850     DBUG_RETURN(NESTED_LOOP_ERROR);
19851 
19852   join->return_tab= join_tab;
19853 
19854   if (join_tab->last_inner)
19855   {
19856     /* join_tab is the first inner table for an outer join operation. */
19857 
19858     /* Set initial state of guard variables for this table.*/
19859     join_tab->found=0;
19860     join_tab->not_null_compl= 1;
19861 
19862     /* Set first_unmatched for the last inner table of this group */
19863     join_tab->last_inner->first_unmatched= join_tab;
19864     if (join_tab->on_precond && !join_tab->on_precond->val_int())
19865       rc= NESTED_LOOP_NO_MORE_ROWS;
19866   }
19867   join->thd->get_stmt_da()->reset_current_row_for_warning();
19868 
19869   if (rc != NESTED_LOOP_NO_MORE_ROWS &&
19870       (rc= join_tab_execution_startup(join_tab)) < 0)
19871     DBUG_RETURN(rc);
19872 
19873   if (join_tab->loosescan_match_tab)
19874     join_tab->loosescan_match_tab->found_match= FALSE;
19875 
19876   if (rc != NESTED_LOOP_NO_MORE_ROWS)
19877   {
19878     error= (*join_tab->read_first_record)(join_tab);
19879     if (!error && join_tab->keep_current_rowid)
19880       join_tab->table->file->position(join_tab->table->record[0]);
19881     rc= evaluate_join_record(join, join_tab, error);
19882   }
19883 
19884   /*
19885     Note: psergey has added the 2nd part of the following condition; the
19886     change should probably be made in 5.1, too.
19887   */
19888   bool skip_over= FALSE;
19889   while (rc == NESTED_LOOP_OK && join->return_tab >= join_tab)
19890   {
19891     if (join_tab->loosescan_match_tab &&
19892         join_tab->loosescan_match_tab->found_match)
19893     {
19894       KEY *key= join_tab->table->key_info + join_tab->loosescan_key;
19895       key_copy(join_tab->loosescan_buf, join_tab->table->record[0], key,
19896                join_tab->loosescan_key_len);
19897       skip_over= TRUE;
19898     }
19899 
19900     error= info->read_record();
19901 
19902     if (skip_over && likely(!error))
19903     {
19904       if (!key_cmp(join_tab->table->key_info[join_tab->loosescan_key].key_part,
19905                    join_tab->loosescan_buf, join_tab->loosescan_key_len))
19906       {
19907         /*
19908           This is the LooseScan action: skip over records with the same key
19909           value if we already had a match for them.
19910         */
19911         continue;
19912       }
19913       join_tab->loosescan_match_tab->found_match= FALSE;
19914       skip_over= FALSE;
19915     }
19916 
19917     if (join_tab->keep_current_rowid && likely(!error))
19918       join_tab->table->file->position(join_tab->table->record[0]);
19919 
19920     rc= evaluate_join_record(join, join_tab, error);
19921   }
19922 
19923   if (rc == NESTED_LOOP_NO_MORE_ROWS &&
19924       join_tab->last_inner && !join_tab->found)
19925     rc= evaluate_null_complemented_join_record(join, join_tab);
19926 
19927   if (rc == NESTED_LOOP_NO_MORE_ROWS)
19928     rc= NESTED_LOOP_OK;
19929   DBUG_RETURN(rc);
19930 }
19931 
19932 /**
19933   @brief Process one row of the nested loop join.
19934 
19935   This function will evaluate parts of WHERE/ON clauses that are
19936   applicable to the partial row on hand and in case of success
19937   submit this row to the next level of the nested loop.
19938 
19939   @param  join     - The join object
19940   @param  join_tab - The most inner join_tab being processed
19941   @param  error > 0: Error, terminate processing
19942                 = 0: (Partial) row is available
19943                 < 0: No more rows available at this level
19944   @return Nested loop state (Ok, No_more_rows, Error, Killed)
19945 */
19946 
19947 static enum_nested_loop_state
evaluate_join_record(JOIN * join,JOIN_TAB * join_tab,int error)19948 evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
19949                      int error)
19950 {
19951   bool shortcut_for_distinct= join_tab->shortcut_for_distinct;
19952   ha_rows found_records=join->found_records;
19953   COND *select_cond= join_tab->select_cond;
19954   bool select_cond_result= TRUE;
19955 
19956   DBUG_ENTER("evaluate_join_record");
19957   DBUG_PRINT("enter",
19958              ("evaluate_join_record join: %p join_tab: %p"
19959               " cond: %p error: %d  alias %s",
19960               join, join_tab, select_cond, error,
19961               join_tab->table->alias.ptr()));
19962 
19963   if (error > 0 || unlikely(join->thd->is_error())) // Fatal error
19964     DBUG_RETURN(NESTED_LOOP_ERROR);
19965   if (error < 0)
19966     DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
19967   if (unlikely(join->thd->check_killed()))       // Aborted by user
19968   {
19969     DBUG_RETURN(NESTED_LOOP_KILLED);            /* purecov: inspected */
19970   }
19971 
19972   join_tab->tracker->r_rows++;
19973 
19974   if (select_cond)
19975   {
19976     select_cond_result= MY_TEST(select_cond->val_int());
19977 
19978     /* check for errors evaluating the condition */
19979     if (unlikely(join->thd->is_error()))
19980       DBUG_RETURN(NESTED_LOOP_ERROR);
19981   }
19982 
19983   if (!select_cond || select_cond_result)
19984   {
19985     /*
19986       There is no select condition or the attached pushed down
19987       condition is true => a match is found.
19988     */
19989     join_tab->tracker->r_rows_after_where++;
19990 
19991     bool found= 1;
19992     while (join_tab->first_unmatched && found)
19993     {
19994       /*
19995         The while condition is always false if join_tab is not
19996         the last inner join table of an outer join operation.
19997       */
19998       JOIN_TAB *first_unmatched= join_tab->first_unmatched;
19999       /*
20000         Mark that a match for current outer table is found.
20001         This activates push down conditional predicates attached
20002         to the all inner tables of the outer join.
20003       */
20004       first_unmatched->found= 1;
20005       for (JOIN_TAB *tab= first_unmatched; tab <= join_tab; tab++)
20006       {
20007         /*
20008           Check whether 'not exists' optimization can be used here.
20009           If  tab->table->reginfo.not_exists_optimize is set to true
20010           then WHERE contains a conjunctive predicate IS NULL over
20011           a non-nullable field of tab. When activated this predicate
20012           will filter out all records with matches for the left part
20013           of the outer join whose inner tables start from the
20014           first_unmatched table and include table tab. To safely use
20015           'not exists' optimization we have to check that the
20016           IS NULL predicate is really activated, i.e. all guards
20017           that wrap it are in the 'open' state.
20018 	*/
20019 	bool not_exists_opt_is_applicable=
20020                tab->table->reginfo.not_exists_optimize;
20021 	for (JOIN_TAB *first_upper= first_unmatched->first_upper;
20022              not_exists_opt_is_applicable && first_upper;
20023              first_upper= first_upper->first_upper)
20024         {
20025           if (!first_upper->found)
20026             not_exists_opt_is_applicable= false;
20027         }
20028         /* Check all predicates that has just been activated. */
20029         /*
20030           Actually all predicates non-guarded by first_unmatched->found
20031           will be re-evaluated again. It could be fixed, but, probably,
20032           it's not worth doing now.
20033         */
20034         if (tab->select_cond)
20035         {
20036           const longlong res= tab->select_cond->val_int();
20037           if (join->thd->is_error())
20038             DBUG_RETURN(NESTED_LOOP_ERROR);
20039 
20040           if (!res)
20041           {
20042             /* The condition attached to table tab is false */
20043             if (tab == join_tab)
20044             {
20045               found= 0;
20046               if (not_exists_opt_is_applicable)
20047                 DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
20048             }
20049             else
20050             {
20051               /*
20052                 Set a return point if rejected predicate is attached
20053                 not to the last table of the current nest level.
20054               */
20055               join->return_tab= tab;
20056               if (not_exists_opt_is_applicable)
20057                 DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
20058               else
20059                 DBUG_RETURN(NESTED_LOOP_OK);
20060             }
20061           }
20062         }
20063       }
20064       /*
20065         Check whether join_tab is not the last inner table
20066         for another embedding outer join.
20067       */
20068       if ((first_unmatched= first_unmatched->first_upper) &&
20069           first_unmatched->last_inner != join_tab)
20070         first_unmatched= 0;
20071       join_tab->first_unmatched= first_unmatched;
20072     }
20073 
20074     JOIN_TAB *return_tab= join->return_tab;
20075     join_tab->found_match= TRUE;
20076 
20077     if (join_tab->check_weed_out_table && found)
20078     {
20079       int res= join_tab->check_weed_out_table->sj_weedout_check_row(join->thd);
20080       DBUG_PRINT("info", ("weedout_check: %d", res));
20081       if (res == -1)
20082         DBUG_RETURN(NESTED_LOOP_ERROR);
20083       else if (res == 1)
20084         found= FALSE;
20085     }
20086     else if (join_tab->do_firstmatch)
20087     {
20088       /*
20089         We should return to the join_tab->do_firstmatch after we have
20090         enumerated all the suffixes for current prefix row combination
20091       */
20092       return_tab= join_tab->do_firstmatch;
20093     }
20094 
20095     /*
20096       It was not just a return to lower loop level when one
20097       of the newly activated predicates is evaluated as false
20098       (See above join->return_tab= tab).
20099     */
20100     join->join_examined_rows++;
20101     DBUG_PRINT("counts", ("join->examined_rows++: %lu  found: %d",
20102                           (ulong) join->join_examined_rows, (int) found));
20103 
20104     if (found)
20105     {
20106       enum enum_nested_loop_state rc;
20107       /* A match from join_tab is found for the current partial join. */
20108       rc= (*join_tab->next_select)(join, join_tab+1, 0);
20109       join->thd->get_stmt_da()->inc_current_row_for_warning();
20110       if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
20111         DBUG_RETURN(rc);
20112       if (return_tab < join->return_tab)
20113         join->return_tab= return_tab;
20114 
20115       /* check for errors evaluating the condition */
20116       if (unlikely(join->thd->is_error()))
20117         DBUG_RETURN(NESTED_LOOP_ERROR);
20118 
20119       if (join->return_tab < join_tab)
20120         DBUG_RETURN(NESTED_LOOP_OK);
20121       /*
20122         Test if this was a SELECT DISTINCT query on a table that
20123         was not in the field list;  In this case we can abort if
20124         we found a row, as no new rows can be added to the result.
20125       */
20126       if (shortcut_for_distinct && found_records != join->found_records)
20127         DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
20128 
20129       DBUG_RETURN(NESTED_LOOP_OK);
20130     }
20131   }
20132   else
20133   {
20134     /*
20135       The condition pushed down to the table join_tab rejects all rows
20136       with the beginning coinciding with the current partial join.
20137     */
20138     join->join_examined_rows++;
20139   }
20140 
20141   join->thd->get_stmt_da()->inc_current_row_for_warning();
20142   join_tab->read_record.unlock_row(join_tab);
20143 
20144   DBUG_RETURN(NESTED_LOOP_OK);
20145 }
20146 
20147 /**
20148 
20149   @details
20150     Construct a NULL complimented partial join record and feed it to the next
20151     level of the nested loop. This function is used in case we have
20152     an OUTER join and no matching record was found.
20153 */
20154 
20155 static enum_nested_loop_state
evaluate_null_complemented_join_record(JOIN * join,JOIN_TAB * join_tab)20156 evaluate_null_complemented_join_record(JOIN *join, JOIN_TAB *join_tab)
20157 {
20158   /*
20159     The table join_tab is the first inner table of a outer join operation
20160     and no matches has been found for the current outer row.
20161   */
20162   JOIN_TAB *last_inner_tab= join_tab->last_inner;
20163   /* Cache variables for faster loop */
20164   COND *select_cond;
20165   for ( ; join_tab <= last_inner_tab ; join_tab++)
20166   {
20167     /* Change the the values of guard predicate variables. */
20168     join_tab->found= 1;
20169     join_tab->not_null_compl= 0;
20170     /* The outer row is complemented by nulls for each inner tables */
20171     restore_record(join_tab->table,s->default_values);  // Make empty record
20172     mark_as_null_row(join_tab->table);       // For group by without error
20173     select_cond= join_tab->select_cond;
20174     /* Check all attached conditions for inner table rows. */
20175     if (select_cond && !select_cond->val_int())
20176       return NESTED_LOOP_OK;
20177   }
20178   join_tab--;
20179   /*
20180     The row complemented by nulls might be the first row
20181     of embedding outer joins.
20182     If so, perform the same actions as in the code
20183     for the first regular outer join row above.
20184   */
20185   for ( ; ; )
20186   {
20187     JOIN_TAB *first_unmatched= join_tab->first_unmatched;
20188     if ((first_unmatched= first_unmatched->first_upper) &&
20189         first_unmatched->last_inner != join_tab)
20190       first_unmatched= 0;
20191     join_tab->first_unmatched= first_unmatched;
20192     if (!first_unmatched)
20193       break;
20194     first_unmatched->found= 1;
20195     for (JOIN_TAB *tab= first_unmatched; tab <= join_tab; tab++)
20196     {
20197       if (tab->select_cond && !tab->select_cond->val_int())
20198       {
20199         join->return_tab= tab;
20200         return NESTED_LOOP_OK;
20201       }
20202     }
20203   }
20204   /*
20205     The row complemented by nulls satisfies all conditions
20206     attached to inner tables.
20207   */
20208   if (join_tab->check_weed_out_table)
20209   {
20210     int res= join_tab->check_weed_out_table->sj_weedout_check_row(join->thd);
20211     if (res == -1)
20212       return NESTED_LOOP_ERROR;
20213     else if (res == 1)
20214       return NESTED_LOOP_OK;
20215   }
20216   else if (join_tab->do_firstmatch)
20217   {
20218     /*
20219       We should return to the join_tab->do_firstmatch after we have
20220       enumerated all the suffixes for current prefix row combination
20221     */
20222     if (join_tab->do_firstmatch < join->return_tab)
20223       join->return_tab= join_tab->do_firstmatch;
20224   }
20225 
20226   /*
20227     Send the row complemented by nulls to be joined with the
20228     remaining tables.
20229   */
20230   return (*join_tab->next_select)(join, join_tab+1, 0);
20231 }
20232 
20233 /*****************************************************************************
20234   The different ways to read a record
20235   Returns -1 if row was not found, 0 if row was found and 1 on errors
20236 *****************************************************************************/
20237 
20238 /** Help function when we get some an error from the table handler. */
20239 
report_error(TABLE * table,int error)20240 int report_error(TABLE *table, int error)
20241 {
20242   if (error == HA_ERR_END_OF_FILE || error == HA_ERR_KEY_NOT_FOUND)
20243   {
20244     table->status= STATUS_GARBAGE;
20245     return -1;					// key not found; ok
20246   }
20247   /*
20248     Locking reads can legally return also these errors, do not
20249     print them to the .err log
20250   */
20251   if (error != HA_ERR_LOCK_DEADLOCK && error != HA_ERR_LOCK_WAIT_TIMEOUT
20252       && error != HA_ERR_TABLE_DEF_CHANGED && !table->in_use->killed)
20253     sql_print_error("Got error %d when reading table '%s'",
20254 		    error, table->s->path.str);
20255   table->file->print_error(error,MYF(0));
20256   return 1;
20257 }
20258 
20259 
safe_index_read(JOIN_TAB * tab)20260 int safe_index_read(JOIN_TAB *tab)
20261 {
20262   int error;
20263   TABLE *table= tab->table;
20264   if (unlikely((error=
20265                 table->file->ha_index_read_map(table->record[0],
20266                                                tab->ref.key_buff,
20267                                                make_prev_keypart_map(tab->ref.key_parts),
20268                                                HA_READ_KEY_EXACT))))
20269     return report_error(table, error);
20270   return 0;
20271 }
20272 
20273 
20274 /**
20275   Reads content of constant table
20276 
20277   @param tab  table
20278   @param pos  position of table in query plan
20279 
20280   @retval 0   ok, one row was found or one NULL-complemented row was created
20281   @retval -1  ok, no row was found and no NULL-complemented row was created
20282   @retval 1   error
20283 */
20284 
20285 static int
join_read_const_table(THD * thd,JOIN_TAB * tab,POSITION * pos)20286 join_read_const_table(THD *thd, JOIN_TAB *tab, POSITION *pos)
20287 {
20288   int error;
20289   TABLE_LIST *tbl;
20290   DBUG_ENTER("join_read_const_table");
20291   TABLE *table=tab->table;
20292   table->const_table=1;
20293   table->null_row=0;
20294   table->status=STATUS_NO_RECORD;
20295 
20296   if (tab->table->pos_in_table_list->is_materialized_derived() &&
20297       !tab->table->pos_in_table_list->fill_me)
20298   {
20299     //TODO: don't get here at all
20300     /* Skip materialized derived tables/views. */
20301     DBUG_RETURN(0);
20302   }
20303   else if (tab->table->pos_in_table_list->jtbm_subselect &&
20304           tab->table->pos_in_table_list->jtbm_subselect->is_jtbm_const_tab)
20305   {
20306     /* Row will not be found */
20307     int res;
20308     if (tab->table->pos_in_table_list->jtbm_subselect->jtbm_const_row_found)
20309       res= 0;
20310     else
20311       res= -1;
20312     DBUG_RETURN(res);
20313   }
20314   else if (tab->type == JT_SYSTEM)
20315   {
20316     if (unlikely((error=join_read_system(tab))))
20317     {						// Info for DESCRIBE
20318       tab->info= ET_CONST_ROW_NOT_FOUND;
20319       /* Mark for EXPLAIN that the row was not found */
20320       pos->records_read=0.0;
20321       pos->ref_depend_map= 0;
20322       if (!table->pos_in_table_list->outer_join || error > 0)
20323 	DBUG_RETURN(error);
20324     }
20325     /*
20326       The optimizer trust the engine that when stats.records is 0, there
20327       was no found rows
20328     */
20329     DBUG_ASSERT(table->file->stats.records > 0 || error);
20330   }
20331   else
20332   {
20333     if (/*!table->file->key_read && */
20334         table->covering_keys.is_set(tab->ref.key) && !table->no_keyread &&
20335         (int) table->reginfo.lock_type <= (int) TL_READ_HIGH_PRIORITY)
20336     {
20337       table->file->ha_start_keyread(tab->ref.key);
20338       tab->index= tab->ref.key;
20339     }
20340     error=join_read_const(tab);
20341     table->file->ha_end_keyread();
20342     if (unlikely(error))
20343     {
20344       tab->info= ET_UNIQUE_ROW_NOT_FOUND;
20345       /* Mark for EXPLAIN that the row was not found */
20346       pos->records_read=0.0;
20347       pos->ref_depend_map= 0;
20348       if (!table->pos_in_table_list->outer_join || error > 0)
20349 	DBUG_RETURN(error);
20350     }
20351   }
20352   /*
20353      Evaluate an on-expression only if it is not considered expensive.
20354      This mainly prevents executing subqueries in optimization phase.
20355      This is necessary since proper setup for such execution has not been
20356      done at this stage.
20357   */
20358   if (*tab->on_expr_ref && !table->null_row &&
20359       !(*tab->on_expr_ref)->is_expensive())
20360   {
20361 #if !defined(DBUG_OFF) && defined(NOT_USING_ITEM_EQUAL)
20362     /*
20363       This test could be very useful to find bugs in the optimizer
20364       where we would call this function with an expression that can't be
20365       evaluated yet. We can't have this enabled by default as long as
20366       have items like Item_equal, that doesn't report they are const but
20367       they can still be called even if they contain not const items.
20368     */
20369     (*tab->on_expr_ref)->update_used_tables();
20370     DBUG_ASSERT((*tab->on_expr_ref)->const_item());
20371 #endif
20372     if ((table->null_row= MY_TEST((*tab->on_expr_ref)->val_int() == 0)))
20373       mark_as_null_row(table);
20374   }
20375   if (!table->null_row && ! tab->join->mixed_implicit_grouping)
20376     table->maybe_null= 0;
20377 
20378   {
20379     JOIN *join= tab->join;
20380     List_iterator<TABLE_LIST> ti(join->select_lex->leaf_tables);
20381     /* Check appearance of new constant items in Item_equal objects */
20382     if (join->conds)
20383       update_const_equal_items(thd, join->conds, tab, TRUE);
20384     while ((tbl= ti++))
20385     {
20386       TABLE_LIST *embedded;
20387       TABLE_LIST *embedding= tbl;
20388       do
20389       {
20390         embedded= embedding;
20391         if (embedded->on_expr)
20392            update_const_equal_items(thd, embedded->on_expr, tab, TRUE);
20393         embedding= embedded->embedding;
20394       }
20395       while (embedding &&
20396              embedding->nested_join->join_list.head() == embedded);
20397     }
20398   }
20399   DBUG_RETURN(0);
20400 }
20401 
20402 
20403 /**
20404   Read a constant table when there is at most one matching row, using a table
20405   scan.
20406 
20407   @param tab			Table to read
20408 
20409   @retval  0  Row was found
20410   @retval  -1 Row was not found
20411   @retval  1  Got an error (other than row not found) during read
20412 */
20413 static int
join_read_system(JOIN_TAB * tab)20414 join_read_system(JOIN_TAB *tab)
20415 {
20416   TABLE *table= tab->table;
20417   int error;
20418   if (table->status & STATUS_GARBAGE)		// If first read
20419   {
20420     if (unlikely((error=
20421                   table->file->ha_read_first_row(table->record[0],
20422                                                  table->s->primary_key))))
20423     {
20424       if (error != HA_ERR_END_OF_FILE)
20425 	return report_error(table, error);
20426       table->const_table= 1;
20427       mark_as_null_row(tab->table);
20428       empty_record(table);			// Make empty record
20429       return -1;
20430     }
20431     store_record(table,record[1]);
20432   }
20433   else if (!table->status)			// Only happens with left join
20434     restore_record(table,record[1]);			// restore old record
20435   table->null_row=0;
20436   return table->status ? -1 : 0;
20437 }
20438 
20439 
20440 /**
20441   Read a table when there is at most one matching row.
20442 
20443   @param tab			Table to read
20444 
20445   @retval  0  Row was found
20446   @retval  -1 Row was not found
20447   @retval  1  Got an error (other than row not found) during read
20448 */
20449 
20450 static int
join_read_const(JOIN_TAB * tab)20451 join_read_const(JOIN_TAB *tab)
20452 {
20453   int error;
20454   TABLE *table= tab->table;
20455   if (table->status & STATUS_GARBAGE)		// If first read
20456   {
20457     table->status= 0;
20458     if (cp_buffer_from_ref(tab->join->thd, table, &tab->ref))
20459       error=HA_ERR_KEY_NOT_FOUND;
20460     else
20461     {
20462       error= table->file->ha_index_read_idx_map(table->record[0],tab->ref.key,
20463                                                 (uchar*) tab->ref.key_buff,
20464                                                 make_prev_keypart_map(tab->ref.key_parts),
20465                                                 HA_READ_KEY_EXACT);
20466     }
20467     if (unlikely(error))
20468     {
20469       table->status= STATUS_NOT_FOUND;
20470       mark_as_null_row(tab->table);
20471       empty_record(table);
20472       if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
20473 	return report_error(table, error);
20474       return -1;
20475     }
20476     store_record(table,record[1]);
20477   }
20478   else if (!(table->status & ~STATUS_NULL_ROW))	// Only happens with left join
20479   {
20480     table->status=0;
20481     restore_record(table,record[1]);			// restore old record
20482   }
20483   table->null_row=0;
20484   return table->status ? -1 : 0;
20485 }
20486 
20487 /*
20488   eq_ref access method implementation: "read_first" function
20489 
20490   SYNOPSIS
20491     join_read_key()
20492       tab  JOIN_TAB of the accessed table
20493 
20494   DESCRIPTION
20495     This is "read_fist" function for the eq_ref access method. The difference
20496     from ref access function is that is that it has a one-element lookup
20497     cache (see cmp_buffer_with_ref)
20498 
20499   RETURN
20500     0  - Ok
20501    -1  - Row not found
20502     1  - Error
20503 */
20504 
20505 
20506 static int
join_read_key(JOIN_TAB * tab)20507 join_read_key(JOIN_TAB *tab)
20508 {
20509   return join_read_key2(tab->join->thd, tab, tab->table, &tab->ref);
20510 }
20511 
20512 
20513 /*
20514   eq_ref access handler but generalized a bit to support TABLE and TABLE_REF
20515   not from the join_tab. See join_read_key for detailed synopsis.
20516 */
join_read_key2(THD * thd,JOIN_TAB * tab,TABLE * table,TABLE_REF * table_ref)20517 int join_read_key2(THD *thd, JOIN_TAB *tab, TABLE *table, TABLE_REF *table_ref)
20518 {
20519   int error;
20520   if (!table->file->inited)
20521   {
20522     error= table->file->ha_index_init(table_ref->key, tab ? tab->sorted : TRUE);
20523     if (unlikely(error))
20524     {
20525       (void) report_error(table, error);
20526       return 1;
20527     }
20528   }
20529 
20530   /*
20531     The following is needed when one makes ref (or eq_ref) access from row
20532     comparisons: one must call row->bring_value() to get the new values.
20533   */
20534   if (tab && tab->bush_children)
20535   {
20536     TABLE_LIST *emb_sj_nest= tab->bush_children->start->emb_sj_nest;
20537     emb_sj_nest->sj_subq_pred->left_expr->bring_value();
20538   }
20539 
20540   /* TODO: Why don't we do "Late NULLs Filtering" here? */
20541 
20542   if (cmp_buffer_with_ref(thd, table, table_ref) ||
20543       (table->status & (STATUS_GARBAGE | STATUS_NO_PARENT | STATUS_NULL_ROW)))
20544   {
20545     if (table_ref->key_err)
20546     {
20547       table->status=STATUS_NOT_FOUND;
20548       return -1;
20549     }
20550     /*
20551       Moving away from the current record. Unlock the row
20552       in the handler if it did not match the partial WHERE.
20553     */
20554     if (tab && tab->ref.has_record && tab->ref.use_count == 0)
20555     {
20556       tab->read_record.table->file->unlock_row();
20557       table_ref->has_record= FALSE;
20558     }
20559     error=table->file->ha_index_read_map(table->record[0],
20560                                   table_ref->key_buff,
20561                                   make_prev_keypart_map(table_ref->key_parts),
20562                                   HA_READ_KEY_EXACT);
20563     if (unlikely(error) &&
20564         error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
20565       return report_error(table, error);
20566 
20567     if (likely(!error))
20568     {
20569       table_ref->has_record= TRUE;
20570       table_ref->use_count= 1;
20571     }
20572   }
20573   else if (table->status == 0)
20574   {
20575     DBUG_ASSERT(table_ref->has_record);
20576     table_ref->use_count++;
20577   }
20578   table->null_row=0;
20579   return table->status ? -1 : 0;
20580 }
20581 
20582 
20583 /**
20584   Since join_read_key may buffer a record, do not unlock
20585   it if it was not used in this invocation of join_read_key().
20586   Only count locks, thus remembering if the record was left unused,
20587   and unlock already when pruning the current value of
20588   TABLE_REF buffer.
20589   @sa join_read_key()
20590 */
20591 
20592 static void
join_read_key_unlock_row(st_join_table * tab)20593 join_read_key_unlock_row(st_join_table *tab)
20594 {
20595   DBUG_ASSERT(tab->ref.use_count);
20596   if (tab->ref.use_count)
20597     tab->ref.use_count--;
20598 }
20599 
20600 /*
20601   ref access method implementation: "read_first" function
20602 
20603   SYNOPSIS
20604     join_read_always_key()
20605       tab  JOIN_TAB of the accessed table
20606 
20607   DESCRIPTION
20608     This is "read_fist" function for the "ref" access method.
20609 
20610     The functon must leave the index initialized when it returns.
20611     ref_or_null access implementation depends on that.
20612 
20613   RETURN
20614     0  - Ok
20615    -1  - Row not found
20616     1  - Error
20617 */
20618 
20619 static int
join_read_always_key(JOIN_TAB * tab)20620 join_read_always_key(JOIN_TAB *tab)
20621 {
20622   int error;
20623   TABLE *table= tab->table;
20624 
20625   /* Initialize the index first */
20626   if (!table->file->inited)
20627   {
20628     if (unlikely((error= table->file->ha_index_init(tab->ref.key,
20629                                                     tab->sorted))))
20630     {
20631       (void) report_error(table, error);
20632       return 1;
20633     }
20634   }
20635 
20636   if (unlikely(cp_buffer_from_ref(tab->join->thd, table, &tab->ref)))
20637     return -1;
20638   if (unlikely((error=
20639                 table->file->prepare_index_key_scan_map(tab->ref.key_buff,
20640                                                         make_prev_keypart_map(tab->ref.key_parts)))))
20641   {
20642     report_error(table,error);
20643     return -1;
20644   }
20645   if ((error= table->file->ha_index_read_map(table->record[0],
20646                                              tab->ref.key_buff,
20647                                              make_prev_keypart_map(tab->ref.key_parts),
20648                                              HA_READ_KEY_EXACT)))
20649   {
20650     if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
20651       return report_error(table, error);
20652     return -1; /* purecov: inspected */
20653   }
20654   return 0;
20655 }
20656 
20657 
20658 /**
20659   This function is used when optimizing away ORDER BY in
20660   SELECT * FROM t1 WHERE a=1 ORDER BY a DESC,b DESC.
20661 */
20662 
20663 static int
join_read_last_key(JOIN_TAB * tab)20664 join_read_last_key(JOIN_TAB *tab)
20665 {
20666   int error;
20667   TABLE *table= tab->table;
20668 
20669   if (!table->file->inited &&
20670       unlikely((error= table->file->ha_index_init(tab->ref.key, tab->sorted))))
20671   {
20672     (void) report_error(table, error);
20673     return 1;
20674   }
20675 
20676   if (unlikely(cp_buffer_from_ref(tab->join->thd, table, &tab->ref)))
20677     return -1;
20678   if (unlikely((error=
20679                 table->file->prepare_index_key_scan_map(tab->ref.key_buff,
20680                                                         make_prev_keypart_map(tab->ref.key_parts)))) )
20681   {
20682     report_error(table,error);
20683     return -1;
20684   }
20685   if (unlikely((error=
20686                 table->file->ha_index_read_map(table->record[0],
20687                                                tab->ref.key_buff,
20688                                                make_prev_keypart_map(tab->ref.key_parts),
20689                                                HA_READ_PREFIX_LAST))))
20690   {
20691     if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
20692       return report_error(table, error);
20693     return -1; /* purecov: inspected */
20694   }
20695   return 0;
20696 }
20697 
20698 
20699 	/* ARGSUSED */
20700 static int
join_no_more_records(READ_RECORD * info)20701 join_no_more_records(READ_RECORD *info __attribute__((unused)))
20702 {
20703   return -1;
20704 }
20705 
20706 
20707 static int
join_read_next_same(READ_RECORD * info)20708 join_read_next_same(READ_RECORD *info)
20709 {
20710   int error;
20711   TABLE *table= info->table;
20712   JOIN_TAB *tab=table->reginfo.join_tab;
20713 
20714   if (unlikely((error= table->file->ha_index_next_same(table->record[0],
20715                                                        tab->ref.key_buff,
20716                                                        tab->ref.key_length))))
20717   {
20718     if (error != HA_ERR_END_OF_FILE)
20719       return report_error(table, error);
20720     table->status= STATUS_GARBAGE;
20721     return -1;
20722   }
20723   return 0;
20724 }
20725 
20726 
20727 static int
join_read_prev_same(READ_RECORD * info)20728 join_read_prev_same(READ_RECORD *info)
20729 {
20730   int error;
20731   TABLE *table= info->table;
20732   JOIN_TAB *tab=table->reginfo.join_tab;
20733 
20734   if (unlikely((error= table->file->ha_index_prev(table->record[0]))))
20735     return report_error(table, error);
20736   if (key_cmp_if_same(table, tab->ref.key_buff, tab->ref.key,
20737                       tab->ref.key_length))
20738   {
20739     table->status=STATUS_NOT_FOUND;
20740     error= -1;
20741   }
20742   return error;
20743 }
20744 
20745 
20746 static int
join_init_quick_read_record(JOIN_TAB * tab)20747 join_init_quick_read_record(JOIN_TAB *tab)
20748 {
20749   if (test_if_quick_select(tab) == -1)
20750     return -1;					/* No possible records */
20751   return join_init_read_record(tab);
20752 }
20753 
20754 
read_first_record_seq(JOIN_TAB * tab)20755 int read_first_record_seq(JOIN_TAB *tab)
20756 {
20757   if (unlikely(tab->read_record.table->file->ha_rnd_init_with_error(1)))
20758     return 1;
20759   return tab->read_record.read_record();
20760 }
20761 
20762 static int
test_if_quick_select(JOIN_TAB * tab)20763 test_if_quick_select(JOIN_TAB *tab)
20764 {
20765   DBUG_EXECUTE_IF("show_explain_probe_test_if_quick_select",
20766                   if (dbug_user_var_equals_int(tab->join->thd,
20767                                                "show_explain_probe_select_id",
20768                                                tab->join->select_lex->select_number))
20769                         dbug_serve_apcs(tab->join->thd, 1);
20770                  );
20771 
20772 
20773   delete tab->select->quick;
20774   tab->select->quick=0;
20775 
20776   if (tab->table->file->inited != handler::NONE)
20777     tab->table->file->ha_index_or_rnd_end();
20778 
20779   int res= tab->select->test_quick_select(tab->join->thd, tab->keys,
20780                                           (table_map) 0, HA_POS_ERROR, 0,
20781                                           FALSE, /*remove where parts*/FALSE);
20782   if (tab->explain_plan && tab->explain_plan->range_checked_fer)
20783     tab->explain_plan->range_checked_fer->collect_data(tab->select->quick);
20784 
20785   return res;
20786 }
20787 
20788 
20789 static
test_if_use_dynamic_range_scan(JOIN_TAB * join_tab)20790 bool test_if_use_dynamic_range_scan(JOIN_TAB *join_tab)
20791 {
20792     return (join_tab->use_quick == 2 && test_if_quick_select(join_tab) > 0);
20793 }
20794 
join_init_read_record(JOIN_TAB * tab)20795 int join_init_read_record(JOIN_TAB *tab)
20796 {
20797   /*
20798     Note: the query plan tree for the below operations is constructed in
20799     save_agg_explain_data.
20800   */
20801   if (tab->distinct && tab->remove_duplicates())  // Remove duplicates.
20802     return 1;
20803   if (tab->filesort && tab->sort_table())     // Sort table.
20804     return 1;
20805 
20806   DBUG_EXECUTE_IF("kill_join_init_read_record",
20807                   tab->join->thd->set_killed(KILL_QUERY););
20808   if (tab->select && tab->select->quick && tab->select->quick->reset())
20809   {
20810     /* Ensures error status is propagated back to client */
20811     report_error(tab->table,
20812                  tab->join->thd->killed ? HA_ERR_QUERY_INTERRUPTED : HA_ERR_OUT_OF_MEM);
20813     return 1;
20814   }
20815   /* make sure we won't get ER_QUERY_INTERRUPTED from any code below */
20816   DBUG_EXECUTE_IF("kill_join_init_read_record",
20817                   tab->join->thd->reset_killed(););
20818   if (!tab->preread_init_done  && tab->preread_init())
20819     return 1;
20820   if (init_read_record(&tab->read_record, tab->join->thd, tab->table,
20821                        tab->select, tab->filesort_result, 1,1, FALSE))
20822     return 1;
20823   return tab->read_record.read_record();
20824 }
20825 
20826 int
join_read_record_no_init(JOIN_TAB * tab)20827 join_read_record_no_init(JOIN_TAB *tab)
20828 {
20829   Copy_field *save_copy, *save_copy_end;
20830 
20831   /*
20832     init_read_record resets all elements of tab->read_record().
20833     Remember things that we don't want to have reset.
20834   */
20835   save_copy=     tab->read_record.copy_field;
20836   save_copy_end= tab->read_record.copy_field_end;
20837 
20838   init_read_record(&tab->read_record, tab->join->thd, tab->table,
20839 		   tab->select, tab->filesort_result, 1, 1, FALSE);
20840 
20841   tab->read_record.copy_field=     save_copy;
20842   tab->read_record.copy_field_end= save_copy_end;
20843   tab->read_record.read_record_func= rr_sequential_and_unpack;
20844 
20845   return tab->read_record.read_record();
20846 }
20847 
20848 
20849 /*
20850   Helper function for sorting table with filesort.
20851 */
20852 
20853 bool
sort_table()20854 JOIN_TAB::sort_table()
20855 {
20856   int rc;
20857   DBUG_PRINT("info",("Sorting for index"));
20858   THD_STAGE_INFO(join->thd, stage_creating_sort_index);
20859   DBUG_ASSERT(join->ordered_index_usage != (filesort->order == join->order ?
20860                                             JOIN::ordered_index_order_by :
20861                                             JOIN::ordered_index_group_by));
20862   rc= create_sort_index(join->thd, join, this, NULL);
20863   return (rc != 0);
20864 }
20865 
20866 
20867 static int
join_read_first(JOIN_TAB * tab)20868 join_read_first(JOIN_TAB *tab)
20869 {
20870   int error= 0;
20871   TABLE *table=tab->table;
20872   DBUG_ENTER("join_read_first");
20873 
20874   DBUG_ASSERT(table->no_keyread ||
20875               !table->covering_keys.is_set(tab->index) ||
20876               table->file->keyread == tab->index);
20877   tab->table->status=0;
20878   tab->read_record.read_record_func= join_read_next;
20879   tab->read_record.table=table;
20880   if (!table->file->inited)
20881     error= table->file->ha_index_init(tab->index, tab->sorted);
20882   if (likely(!error))
20883     error= table->file->prepare_index_scan();
20884   if (unlikely(error) ||
20885       unlikely(error= tab->table->file->ha_index_first(tab->table->record[0])))
20886   {
20887     if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
20888       report_error(table, error);
20889     DBUG_RETURN(-1);
20890   }
20891   DBUG_RETURN(0);
20892 }
20893 
20894 
20895 static int
join_read_next(READ_RECORD * info)20896 join_read_next(READ_RECORD *info)
20897 {
20898   int error;
20899   if (unlikely((error= info->table->file->ha_index_next(info->record()))))
20900     return report_error(info->table, error);
20901 
20902   return 0;
20903 }
20904 
20905 
20906 static int
join_read_last(JOIN_TAB * tab)20907 join_read_last(JOIN_TAB *tab)
20908 {
20909   TABLE *table=tab->table;
20910   int error= 0;
20911   DBUG_ENTER("join_read_last");
20912 
20913   DBUG_ASSERT(table->no_keyread ||
20914               !table->covering_keys.is_set(tab->index) ||
20915               table->file->keyread == tab->index);
20916   tab->table->status=0;
20917   tab->read_record.read_record_func= join_read_prev;
20918   tab->read_record.table=table;
20919   if (!table->file->inited)
20920     error= table->file->ha_index_init(tab->index, 1);
20921   if (likely(!error))
20922     error= table->file->prepare_index_scan();
20923   if (unlikely(error) ||
20924       unlikely(error= tab->table->file->ha_index_last(tab->table->record[0])))
20925     DBUG_RETURN(report_error(table, error));
20926 
20927   DBUG_RETURN(0);
20928 }
20929 
20930 
20931 static int
join_read_prev(READ_RECORD * info)20932 join_read_prev(READ_RECORD *info)
20933 {
20934   int error;
20935   if (unlikely((error= info->table->file->ha_index_prev(info->record()))))
20936     return report_error(info->table, error);
20937   return 0;
20938 }
20939 
20940 
20941 static int
join_ft_read_first(JOIN_TAB * tab)20942 join_ft_read_first(JOIN_TAB *tab)
20943 {
20944   int error;
20945   TABLE *table= tab->table;
20946 
20947   if (!table->file->inited &&
20948       (error= table->file->ha_index_init(tab->ref.key, 1)))
20949   {
20950     (void) report_error(table, error);
20951     return 1;
20952   }
20953 
20954   table->file->ft_init();
20955 
20956   if (unlikely((error= table->file->ha_ft_read(table->record[0]))))
20957     return report_error(table, error);
20958   return 0;
20959 }
20960 
20961 static int
join_ft_read_next(READ_RECORD * info)20962 join_ft_read_next(READ_RECORD *info)
20963 {
20964   int error;
20965   if (unlikely((error= info->table->file->ha_ft_read(info->record()))))
20966     return report_error(info->table, error);
20967   return 0;
20968 }
20969 
20970 
20971 /**
20972   Reading of key with key reference and one part that may be NULL.
20973 */
20974 
20975 int
join_read_always_key_or_null(JOIN_TAB * tab)20976 join_read_always_key_or_null(JOIN_TAB *tab)
20977 {
20978   int res;
20979 
20980   /* First read according to key which is NOT NULL */
20981   *tab->ref.null_ref_key= 0;			// Clear null byte
20982   if ((res= join_read_always_key(tab)) >= 0)
20983     return res;
20984 
20985   /* Then read key with null value */
20986   *tab->ref.null_ref_key= 1;			// Set null byte
20987   return safe_index_read(tab);
20988 }
20989 
20990 
20991 int
join_read_next_same_or_null(READ_RECORD * info)20992 join_read_next_same_or_null(READ_RECORD *info)
20993 {
20994   int error;
20995   if (unlikely((error= join_read_next_same(info)) >= 0))
20996     return error;
20997   JOIN_TAB *tab= info->table->reginfo.join_tab;
20998 
20999   /* Test if we have already done a read after null key */
21000   if (*tab->ref.null_ref_key)
21001     return -1;					// All keys read
21002   *tab->ref.null_ref_key= 1;			// Set null byte
21003   return safe_index_read(tab);			// then read null keys
21004 }
21005 
21006 
21007 /*****************************************************************************
21008   DESCRIPTION
21009     Functions that end one nested loop iteration. Different functions
21010     are used to support GROUP BY clause and to redirect records
21011     to a table (e.g. in case of SELECT into a temporary table) or to the
21012     network client.
21013 
21014   RETURN VALUES
21015     NESTED_LOOP_OK           - the record has been successfully handled
21016     NESTED_LOOP_ERROR        - a fatal error (like table corruption)
21017                                was detected
21018     NESTED_LOOP_KILLED       - thread shutdown was requested while processing
21019                                the record
21020     NESTED_LOOP_QUERY_LIMIT  - the record has been successfully handled;
21021                                additionally, the nested loop produced the
21022                                number of rows specified in the LIMIT clause
21023                                for the query
21024     NESTED_LOOP_CURSOR_LIMIT - the record has been successfully handled;
21025                                additionally, there is a cursor and the nested
21026                                loop algorithm produced the number of rows
21027                                that is specified for current cursor fetch
21028                                operation.
21029    All return values except NESTED_LOOP_OK abort the nested loop.
21030 *****************************************************************************/
21031 
21032 /* ARGSUSED */
21033 static enum_nested_loop_state
end_send(JOIN * join,JOIN_TAB * join_tab,bool end_of_records)21034 end_send(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
21035 	 bool end_of_records)
21036 {
21037   DBUG_ENTER("end_send");
21038   /*
21039     When all tables are const this function is called with jointab == NULL.
21040     This function shouldn't be called for the first join_tab as it needs
21041     to get fields from previous tab.
21042   */
21043   DBUG_ASSERT(join_tab == NULL || join_tab != join->join_tab);
21044   //TODO pass fields via argument
21045   List<Item> *fields= join_tab ? (join_tab-1)->fields : join->fields;
21046 
21047   if (!end_of_records)
21048   {
21049     if (join->table_count &&
21050         join->join_tab->is_using_loose_index_scan())
21051     {
21052       /* Copy non-aggregated fields when loose index scan is used. */
21053       copy_fields(&join->tmp_table_param);
21054     }
21055     if (join->having && join->having->val_int() == 0)
21056       DBUG_RETURN(NESTED_LOOP_OK);               // Didn't match having
21057     if (join->procedure)
21058     {
21059       if (join->procedure->send_row(join->procedure_fields_list))
21060         DBUG_RETURN(NESTED_LOOP_ERROR);
21061       DBUG_RETURN(NESTED_LOOP_OK);
21062     }
21063     if (join->do_send_rows)
21064     {
21065       int error;
21066       /* result < 0 if row was not accepted and should not be counted */
21067       if (unlikely((error= join->result->send_data(*fields))))
21068       {
21069         if (error > 0)
21070           DBUG_RETURN(NESTED_LOOP_ERROR);
21071         // error < 0 => duplicate row
21072         join->duplicate_rows++;
21073       }
21074     }
21075 
21076     ++join->send_records;
21077     if (join->send_records >= join->unit->select_limit_cnt &&
21078         !join->do_send_rows)
21079     {
21080       /*
21081         If we have used Priority Queue for optimizing order by with limit,
21082         then stop here, there are no more records to consume.
21083         When this optimization is used, end_send is called on the next
21084         join_tab.
21085       */
21086       if (join->order &&
21087           join->select_options & OPTION_FOUND_ROWS &&
21088           join_tab > join->join_tab &&
21089           (join_tab - 1)->filesort && (join_tab - 1)->filesort->using_pq)
21090       {
21091         DBUG_PRINT("info", ("filesort NESTED_LOOP_QUERY_LIMIT"));
21092         DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT);
21093       }
21094     }
21095     if (join->send_records >= join->unit->select_limit_cnt &&
21096 	join->do_send_rows)
21097     {
21098       if (join->select_options & OPTION_FOUND_ROWS)
21099       {
21100 	JOIN_TAB *jt=join->join_tab;
21101 	if ((join->table_count == 1) && !join->sort_and_group
21102 	    && !join->send_group_parts && !join->having && !jt->select_cond &&
21103 	    !(jt->select && jt->select->quick) &&
21104 	    (jt->table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) &&
21105             (jt->ref.key < 0))
21106 	{
21107 	  /* Join over all rows in table;  Return number of found rows */
21108 	  TABLE *table=jt->table;
21109 
21110 	  if (jt->filesort_result)                     // If filesort was used
21111 	  {
21112 	    join->send_records= jt->filesort_result->found_rows;
21113 	  }
21114 	  else
21115 	  {
21116 	    table->file->info(HA_STATUS_VARIABLE);
21117 	    join->send_records= table->file->stats.records;
21118 	  }
21119 	}
21120 	else
21121 	{
21122 	  join->do_send_rows= 0;
21123 	  if (join->unit->fake_select_lex)
21124 	    join->unit->fake_select_lex->select_limit= 0;
21125 	  DBUG_RETURN(NESTED_LOOP_OK);
21126 	}
21127       }
21128       DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT);      // Abort nicely
21129     }
21130     else if (join->send_records >= join->fetch_limit)
21131     {
21132       /*
21133         There is a server side cursor and all rows for
21134         this fetch request are sent.
21135       */
21136       DBUG_RETURN(NESTED_LOOP_CURSOR_LIMIT);
21137     }
21138   }
21139   else
21140   {
21141     if (join->procedure && join->procedure->end_of_records())
21142       DBUG_RETURN(NESTED_LOOP_ERROR);
21143   }
21144   DBUG_RETURN(NESTED_LOOP_OK);
21145 }
21146 
21147 
21148 /*
21149   @brief
21150     Perform a GROUP BY operation over a stream of rows ordered by their group. The
21151     result is sent into join->result.
21152 
21153   @detail
21154     Also applies HAVING, etc.
21155 */
21156 
21157 enum_nested_loop_state
end_send_group(JOIN * join,JOIN_TAB * join_tab,bool end_of_records)21158 end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
21159 	       bool end_of_records)
21160 {
21161   int idx= -1;
21162   enum_nested_loop_state ok_code= NESTED_LOOP_OK;
21163   List<Item> *fields= join_tab ? (join_tab-1)->fields : join->fields;
21164   DBUG_ENTER("end_send_group");
21165 
21166   if (!join->items3.is_null() && !join->set_group_rpa)
21167   {
21168     join->set_group_rpa= true;
21169     join->set_items_ref_array(join->items3);
21170   }
21171 
21172   if (!join->first_record || end_of_records ||
21173       (idx=test_if_group_changed(join->group_fields)) >= 0)
21174   {
21175     if (!join->group_sent &&
21176         (join->first_record ||
21177          (end_of_records && !join->group && !join->group_optimized_away)))
21178     {
21179       if (join->procedure)
21180 	join->procedure->end_group();
21181       if (idx < (int) join->send_group_parts)
21182       {
21183 	int error=0;
21184 	if (join->procedure)
21185 	{
21186 	  if (join->having && join->having->val_int() == 0)
21187 	    error= -1;				// Didn't satisfy having
21188  	  else
21189 	  {
21190 	    if (join->do_send_rows)
21191 	      error=join->procedure->send_row(*fields) ? 1 : 0;
21192 	    join->send_records++;
21193 	  }
21194 	  if (end_of_records && join->procedure->end_of_records())
21195 	    error= 1;				// Fatal error
21196 	}
21197 	else
21198 	{
21199 	  if (!join->first_record)
21200 	  {
21201             List_iterator_fast<Item> it(*join->fields);
21202             Item *item;
21203             /* No matching rows for group function */
21204             join->clear();
21205 
21206             while ((item= it++))
21207               item->no_rows_in_result();
21208 	  }
21209 	  if (join->having && join->having->val_int() == 0)
21210 	    error= -1;				// Didn't satisfy having
21211 	  else
21212 	  {
21213 	    if (join->do_send_rows)
21214             {
21215 	      error=join->result->send_data(*fields);
21216               if (unlikely(error < 0))
21217               {
21218                 /* Duplicate row, don't count */
21219                 join->duplicate_rows++;
21220                 error= 0;
21221               }
21222             }
21223 	    join->send_records++;
21224             join->group_sent= true;
21225 	  }
21226 	  if (unlikely(join->rollup.state != ROLLUP::STATE_NONE && error <= 0))
21227 	  {
21228 	    if (join->rollup_send_data((uint) (idx+1)))
21229 	      error= 1;
21230 	  }
21231 	}
21232 	if (unlikely(error > 0))
21233           DBUG_RETURN(NESTED_LOOP_ERROR);        /* purecov: inspected */
21234 	if (end_of_records)
21235 	  DBUG_RETURN(NESTED_LOOP_OK);
21236 	if (join->send_records >= join->unit->select_limit_cnt &&
21237 	    join->do_send_rows)
21238 	{
21239 	  if (!(join->select_options & OPTION_FOUND_ROWS))
21240 	    DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT); // Abort nicely
21241 	  join->do_send_rows=0;
21242 	  join->unit->select_limit_cnt = HA_POS_ERROR;
21243         }
21244         else if (join->send_records >= join->fetch_limit)
21245         {
21246           /*
21247             There is a server side cursor and all rows
21248             for this fetch request are sent.
21249           */
21250           /*
21251             Preventing code duplication. When finished with the group reset
21252             the group functions and copy_fields. We fall through. bug #11904
21253           */
21254           ok_code= NESTED_LOOP_CURSOR_LIMIT;
21255         }
21256       }
21257     }
21258     else
21259     {
21260       if (end_of_records)
21261 	DBUG_RETURN(NESTED_LOOP_OK);
21262       join->first_record=1;
21263       (void) test_if_group_changed(join->group_fields);
21264     }
21265     if (idx < (int) join->send_group_parts)
21266     {
21267       /*
21268         This branch is executed also for cursors which have finished their
21269         fetch limit - the reason for ok_code.
21270       */
21271       copy_fields(&join->tmp_table_param);
21272       if (init_sum_functions(join->sum_funcs, join->sum_funcs_end[idx+1]))
21273 	DBUG_RETURN(NESTED_LOOP_ERROR);
21274       if (join->procedure)
21275 	join->procedure->add();
21276       join->group_sent= false;
21277       DBUG_RETURN(ok_code);
21278     }
21279   }
21280   if (update_sum_func(join->sum_funcs))
21281     DBUG_RETURN(NESTED_LOOP_ERROR);
21282   if (join->procedure)
21283     join->procedure->add();
21284   DBUG_RETURN(NESTED_LOOP_OK);
21285 }
21286 
21287 
21288 	/* ARGSUSED */
21289 static enum_nested_loop_state
end_write(JOIN * join,JOIN_TAB * join_tab,bool end_of_records)21290 end_write(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
21291 	  bool end_of_records)
21292 {
21293   TABLE *const table= join_tab->table;
21294   DBUG_ENTER("end_write");
21295 
21296   if (!end_of_records)
21297   {
21298     copy_fields(join_tab->tmp_table_param);
21299     if (copy_funcs(join_tab->tmp_table_param->items_to_copy, join->thd))
21300       DBUG_RETURN(NESTED_LOOP_ERROR);           /* purecov: inspected */
21301 
21302     if (likely(!join_tab->having || join_tab->having->val_int()))
21303     {
21304       int error;
21305       join->found_records++;
21306       if ((error= table->file->ha_write_tmp_row(table->record[0])))
21307       {
21308         if (likely(!table->file->is_fatal_error(error, HA_CHECK_DUP)))
21309 	  goto end;                             // Ignore duplicate keys
21310         bool is_duplicate;
21311 	if (create_internal_tmp_table_from_heap(join->thd, table,
21312                                                 join_tab->tmp_table_param->start_recinfo,
21313                                                 &join_tab->tmp_table_param->recinfo,
21314                                                 error, 1, &is_duplicate))
21315 	  DBUG_RETURN(NESTED_LOOP_ERROR);        // Not a table_is_full error
21316         if (is_duplicate)
21317           goto end;
21318 	table->s->uniques=0;			// To ensure rows are the same
21319       }
21320       if (++join_tab->send_records >=
21321             join_tab->tmp_table_param->end_write_records &&
21322 	  join->do_send_rows)
21323       {
21324 	if (!(join->select_options & OPTION_FOUND_ROWS))
21325 	  DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT);
21326 	join->do_send_rows=0;
21327 	join->unit->select_limit_cnt = HA_POS_ERROR;
21328       }
21329     }
21330   }
21331 end:
21332   if (unlikely(join->thd->check_killed()))
21333   {
21334     DBUG_RETURN(NESTED_LOOP_KILLED);             /* purecov: inspected */
21335   }
21336   DBUG_RETURN(NESTED_LOOP_OK);
21337 }
21338 
21339 
21340 /*
21341   @brief
21342     Perform a GROUP BY operation over rows coming in arbitrary order.
21343 
21344     This is done by looking up the group in a temp.table and updating group
21345     values.
21346 
21347   @detail
21348     Also applies HAVING, etc.
21349 */
21350 
21351 static enum_nested_loop_state
end_update(JOIN * join,JOIN_TAB * join_tab,bool end_of_records)21352 end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
21353 	   bool end_of_records)
21354 {
21355   TABLE *const table= join_tab->table;
21356   ORDER   *group;
21357   int	  error;
21358   DBUG_ENTER("end_update");
21359 
21360   if (end_of_records)
21361     DBUG_RETURN(NESTED_LOOP_OK);
21362 
21363   join->found_records++;
21364   copy_fields(join_tab->tmp_table_param);	// Groups are copied twice.
21365   /* Make a key of group index */
21366   for (group=table->group ; group ; group=group->next)
21367   {
21368     Item *item= *group->item;
21369     if (group->fast_field_copier_setup != group->field)
21370     {
21371       DBUG_PRINT("info", ("new setup %p -> %p",
21372                           group->fast_field_copier_setup,
21373                           group->field));
21374       group->fast_field_copier_setup= group->field;
21375       group->fast_field_copier_func=
21376         item->setup_fast_field_copier(group->field);
21377     }
21378     item->save_org_in_field(group->field, group->fast_field_copier_func);
21379     /* Store in the used key if the field was 0 */
21380     if (item->maybe_null)
21381       group->buff[-1]= (char) group->field->is_null();
21382   }
21383   if (!table->file->ha_index_read_map(table->record[1],
21384                                       join_tab->tmp_table_param->group_buff,
21385                                       HA_WHOLE_KEY,
21386                                       HA_READ_KEY_EXACT))
21387   {						/* Update old record */
21388     restore_record(table,record[1]);
21389     update_tmptable_sum_func(join->sum_funcs,table);
21390     if (unlikely((error= table->file->ha_update_tmp_row(table->record[1],
21391                                                         table->record[0]))))
21392     {
21393       table->file->print_error(error,MYF(0));	/* purecov: inspected */
21394       DBUG_RETURN(NESTED_LOOP_ERROR);            /* purecov: inspected */
21395     }
21396     goto end;
21397   }
21398 
21399   init_tmptable_sum_functions(join->sum_funcs);
21400   if (unlikely(copy_funcs(join_tab->tmp_table_param->items_to_copy,
21401                           join->thd)))
21402     DBUG_RETURN(NESTED_LOOP_ERROR);           /* purecov: inspected */
21403   if (unlikely((error= table->file->ha_write_tmp_row(table->record[0]))))
21404   {
21405     if (create_internal_tmp_table_from_heap(join->thd, table,
21406                                        join_tab->tmp_table_param->start_recinfo,
21407                                             &join_tab->tmp_table_param->recinfo,
21408                                             error, 0, NULL))
21409       DBUG_RETURN(NESTED_LOOP_ERROR);            // Not a table_is_full error
21410     /* Change method to update rows */
21411     if (unlikely((error= table->file->ha_index_init(0, 0))))
21412     {
21413       table->file->print_error(error, MYF(0));
21414       DBUG_RETURN(NESTED_LOOP_ERROR);
21415     }
21416 
21417     join_tab->aggr->set_write_func(end_unique_update);
21418   }
21419   join_tab->send_records++;
21420 end:
21421   if (unlikely(join->thd->check_killed()))
21422   {
21423     DBUG_RETURN(NESTED_LOOP_KILLED);             /* purecov: inspected */
21424   }
21425   DBUG_RETURN(NESTED_LOOP_OK);
21426 }
21427 
21428 
21429 /** Like end_update, but this is done with unique constraints instead of keys.  */
21430 
21431 static enum_nested_loop_state
end_unique_update(JOIN * join,JOIN_TAB * join_tab,bool end_of_records)21432 end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
21433 		  bool end_of_records)
21434 {
21435   TABLE *table= join_tab->table;
21436   int	  error;
21437   DBUG_ENTER("end_unique_update");
21438 
21439   if (end_of_records)
21440     DBUG_RETURN(NESTED_LOOP_OK);
21441 
21442   init_tmptable_sum_functions(join->sum_funcs);
21443   copy_fields(join_tab->tmp_table_param);		// Groups are copied twice.
21444   if (copy_funcs(join_tab->tmp_table_param->items_to_copy, join->thd))
21445     DBUG_RETURN(NESTED_LOOP_ERROR);           /* purecov: inspected */
21446 
21447   if (likely(!(error= table->file->ha_write_tmp_row(table->record[0]))))
21448     join_tab->send_records++;			// New group
21449   else
21450   {
21451     if (unlikely((int) table->file->get_dup_key(error) < 0))
21452     {
21453       table->file->print_error(error,MYF(0));	/* purecov: inspected */
21454       DBUG_RETURN(NESTED_LOOP_ERROR);            /* purecov: inspected */
21455     }
21456     /* Prepare table for random positioning */
21457     bool rnd_inited= (table->file->inited == handler::RND);
21458     if (!rnd_inited &&
21459         ((error= table->file->ha_index_end()) ||
21460          (error= table->file->ha_rnd_init(0))))
21461     {
21462       table->file->print_error(error, MYF(0));
21463       DBUG_RETURN(NESTED_LOOP_ERROR);
21464     }
21465     if (unlikely(table->file->ha_rnd_pos(table->record[1],table->file->dup_ref)))
21466     {
21467       table->file->print_error(error,MYF(0));	/* purecov: inspected */
21468       DBUG_RETURN(NESTED_LOOP_ERROR);            /* purecov: inspected */
21469     }
21470     restore_record(table,record[1]);
21471     update_tmptable_sum_func(join->sum_funcs,table);
21472     if (unlikely((error= table->file->ha_update_tmp_row(table->record[1],
21473                                                         table->record[0]))))
21474     {
21475       table->file->print_error(error,MYF(0));	/* purecov: inspected */
21476       DBUG_RETURN(NESTED_LOOP_ERROR);            /* purecov: inspected */
21477     }
21478     if (!rnd_inited &&
21479         ((error= table->file->ha_rnd_end()) ||
21480          (error= table->file->ha_index_init(0, 0))))
21481     {
21482       table->file->print_error(error, MYF(0));
21483       DBUG_RETURN(NESTED_LOOP_ERROR);
21484     }
21485   }
21486   if (unlikely(join->thd->check_killed()))
21487   {
21488     DBUG_RETURN(NESTED_LOOP_KILLED);             /* purecov: inspected */
21489   }
21490   DBUG_RETURN(NESTED_LOOP_OK);
21491 }
21492 
21493 
21494 /*
21495   @brief
21496     Perform a GROUP BY operation over a stream of rows ordered by their group.
21497     Write the result into a temporary table.
21498 
21499   @detail
21500     Also applies HAVING, etc.
21501 
21502     The rows are written into temptable so e.g. filesort can read them.
21503 */
21504 
21505 enum_nested_loop_state
end_write_group(JOIN * join,JOIN_TAB * join_tab,bool end_of_records)21506 end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
21507 		bool end_of_records)
21508 {
21509   TABLE *table= join_tab->table;
21510   int	  idx= -1;
21511   DBUG_ENTER("end_write_group");
21512 
21513   if (!join->first_record || end_of_records ||
21514       (idx=test_if_group_changed(join->group_fields)) >= 0)
21515   {
21516     if (join->first_record || (end_of_records && !join->group))
21517     {
21518       if (join->procedure)
21519 	join->procedure->end_group();
21520       int send_group_parts= join->send_group_parts;
21521       if (idx < send_group_parts)
21522       {
21523         if (!join->first_record)
21524         {
21525           /* No matching rows for group function */
21526           join->clear();
21527         }
21528         copy_sum_funcs(join->sum_funcs,
21529                        join->sum_funcs_end[send_group_parts]);
21530 	if (!join_tab->having || join_tab->having->val_int())
21531 	{
21532           int error= table->file->ha_write_tmp_row(table->record[0]);
21533           if (unlikely(error) &&
21534               create_internal_tmp_table_from_heap(join->thd, table,
21535                                           join_tab->tmp_table_param->start_recinfo,
21536                                           &join_tab->tmp_table_param->recinfo,
21537                                                    error, 0, NULL))
21538 	    DBUG_RETURN(NESTED_LOOP_ERROR);
21539         }
21540         if (unlikely(join->rollup.state != ROLLUP::STATE_NONE))
21541 	{
21542           if (unlikely(join->rollup_write_data((uint) (idx+1),
21543                                                join_tab->tmp_table_param,
21544                                                table)))
21545           {
21546 	    DBUG_RETURN(NESTED_LOOP_ERROR);
21547           }
21548 	}
21549 	if (end_of_records)
21550 	  goto end;
21551       }
21552     }
21553     else
21554     {
21555       if (end_of_records)
21556         goto end;
21557       join->first_record=1;
21558       (void) test_if_group_changed(join->group_fields);
21559     }
21560     if (idx < (int) join->send_group_parts)
21561     {
21562       copy_fields(join_tab->tmp_table_param);
21563       if (unlikely(copy_funcs(join_tab->tmp_table_param->items_to_copy,
21564                               join->thd)))
21565 	DBUG_RETURN(NESTED_LOOP_ERROR);
21566       if (unlikely(init_sum_functions(join->sum_funcs,
21567                                       join->sum_funcs_end[idx+1])))
21568 	DBUG_RETURN(NESTED_LOOP_ERROR);
21569       if (unlikely(join->procedure))
21570 	join->procedure->add();
21571       goto end;
21572     }
21573   }
21574   if (unlikely(update_sum_func(join->sum_funcs)))
21575     DBUG_RETURN(NESTED_LOOP_ERROR);
21576   if (unlikely(join->procedure))
21577     join->procedure->add();
21578 end:
21579   if (unlikely(join->thd->check_killed()))
21580   {
21581     DBUG_RETURN(NESTED_LOOP_KILLED);             /* purecov: inspected */
21582   }
21583   DBUG_RETURN(NESTED_LOOP_OK);
21584 }
21585 
21586 
21587 /*****************************************************************************
21588   Remove calculation with tables that aren't yet read. Remove also tests
21589   against fields that are read through key where the table is not a
21590   outer join table.
21591   We can't remove tests that are made against columns which are stored
21592   in sorted order.
21593 *****************************************************************************/
21594 
21595 /**
21596   Check if "left_item=right_item" equality is guaranteed to be true by use of
21597   [eq]ref access on left_item->field->table.
21598 
21599   SYNOPSIS
21600     test_if_ref()
21601       root_cond
21602       left_item
21603       right_item
21604 
21605   DESCRIPTION
21606     Check if the given "left_item = right_item" equality is guaranteed to be
21607     true by use of [eq_]ref access method.
21608 
21609     We need root_cond as we can't remove ON expressions even if employed ref
21610     access guarantees that they are true. This is because  TODO
21611 
21612   RETURN
21613     TRUE   if right_item is used removable reference key on left_item
21614     FALSE  Otherwise
21615 
21616 */
21617 
test_if_ref(Item * root_cond,Item_field * left_item,Item * right_item)21618 bool test_if_ref(Item *root_cond, Item_field *left_item,Item *right_item)
21619 {
21620   Field *field=left_item->field;
21621   JOIN_TAB *join_tab= field->table->reginfo.join_tab;
21622   // No need to change const test
21623   if (!field->table->const_table && join_tab &&
21624       !join_tab->is_ref_for_hash_join() &&
21625       (!join_tab->first_inner ||
21626        *join_tab->first_inner->on_expr_ref == root_cond))
21627   {
21628     /*
21629       If ref access uses "Full scan on NULL key" (i.e. it actually alternates
21630       between ref access and full table scan), then no equality can be
21631       guaranteed to be true.
21632     */
21633     if (join_tab->ref.is_access_triggered())
21634       return FALSE;
21635 
21636     Item *ref_item=part_of_refkey(field->table,field);
21637     if (ref_item && (ref_item->eq(right_item,1) ||
21638 		     ref_item->real_item()->eq(right_item,1)))
21639     {
21640       right_item= right_item->real_item();
21641       if (right_item->type() == Item::FIELD_ITEM)
21642 	return (field->eq_def(((Item_field *) right_item)->field));
21643       /* remove equalities injected by IN->EXISTS transformation */
21644       else if (right_item->type() == Item::CACHE_ITEM)
21645         return ((Item_cache *)right_item)->eq_def (field);
21646       if (right_item->const_item() && !(right_item->is_null()))
21647       {
21648 	/*
21649 	  We can remove binary fields and numerical fields except float,
21650 	  as float comparison isn't 100 % safe
21651 	  We have to keep normal strings to be able to check for end spaces
21652 	*/
21653 	if (field->binary() &&
21654 	    field->real_type() != MYSQL_TYPE_STRING &&
21655 	    field->real_type() != MYSQL_TYPE_VARCHAR &&
21656 	    (field->type() != MYSQL_TYPE_FLOAT || field->decimals() == 0))
21657 	{
21658 	  return !right_item->save_in_field_no_warnings(field, 1);
21659 	}
21660       }
21661     }
21662   }
21663   return 0;					// keep test
21664 }
21665 
21666 
21667 /**
21668    Extract a condition that can be checked after reading given table
21669    @fn make_cond_for_table()
21670 
21671    @param cond       Condition to analyze
21672    @param tables     Tables for which "current field values" are available
21673    @param used_table Table that we're extracting the condition for
21674       tables       Tables for which "current field values" are available (this
21675                    includes used_table)
21676                    (may  also include PSEUDO_TABLE_BITS, and may be zero)
21677    @param join_tab_idx_arg
21678 		     The index of the JOIN_TAB this Item is being extracted
21679                      for. MAX_TABLES if there is no corresponding JOIN_TAB.
21680    @param exclude_expensive_cond
21681 		     Do not push expensive conditions
21682    @param retain_ref_cond
21683                      Retain ref conditions
21684 
21685    @retval <>NULL Generated condition
21686    @retval =NULL  Already checked, OR error
21687 
21688    @details
21689      Extract the condition that can be checked after reading the table
21690      specified in 'used_table', given that current-field values for tables
21691      specified in 'tables' bitmap are available.
21692      If 'used_table' is 0
21693      - extract conditions for all tables in 'tables'.
21694      - extract conditions are unrelated to any tables
21695        in the same query block/level(i.e. conditions
21696        which have used_tables == 0).
21697 
21698      The function assumes that
21699      - Constant parts of the condition has already been checked.
21700      - Condition that could be checked for tables in 'tables' has already
21701      been checked.
21702 
21703      The function takes into account that some parts of the condition are
21704      guaranteed to be true by employed 'ref' access methods (the code that
21705      does this is located at the end, search down for "EQ_FUNC").
21706 
21707    @note
21708      Make sure to keep the implementations of make_cond_for_table() and
21709      make_cond_after_sjm() synchronized.
21710      make_cond_for_info_schema() uses similar algorithm as well.
21711 */
21712 
21713 static Item *
make_cond_for_table(THD * thd,Item * cond,table_map tables,table_map used_table,int join_tab_idx_arg,bool exclude_expensive_cond,bool retain_ref_cond)21714 make_cond_for_table(THD *thd, Item *cond, table_map tables,
21715                     table_map used_table,
21716                     int join_tab_idx_arg,
21717                     bool exclude_expensive_cond __attribute__((unused)),
21718 		    bool retain_ref_cond)
21719 {
21720   return make_cond_for_table_from_pred(thd, cond, cond, tables, used_table,
21721                                        join_tab_idx_arg,
21722                                        exclude_expensive_cond,
21723                                        retain_ref_cond, true);
21724 }
21725 
21726 
21727 static Item *
make_cond_for_table_from_pred(THD * thd,Item * root_cond,Item * cond,table_map tables,table_map used_table,int join_tab_idx_arg,bool exclude_expensive_cond,bool retain_ref_cond,bool is_top_and_level)21728 make_cond_for_table_from_pred(THD *thd, Item *root_cond, Item *cond,
21729                               table_map tables, table_map used_table,
21730                               int join_tab_idx_arg,
21731                               bool exclude_expensive_cond __attribute__
21732                               ((unused)),
21733                               bool retain_ref_cond,
21734                               bool is_top_and_level)
21735 
21736 {
21737   table_map rand_table_bit= (table_map) RAND_TABLE_BIT;
21738 
21739   if (used_table && !(cond->used_tables() & used_table))
21740     return (COND*) 0;				// Already checked
21741 
21742   if (cond->type() == Item::COND_ITEM)
21743   {
21744     if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
21745     {
21746       /* Create new top level AND item */
21747       Item_cond_and *new_cond=new (thd->mem_root) Item_cond_and(thd);
21748       if (!new_cond)
21749 	return (COND*) 0;			// OOM /* purecov: inspected */
21750       List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
21751       Item *item;
21752       while ((item=li++))
21753       {
21754         /*
21755           Special handling of top level conjuncts with RAND_TABLE_BIT:
21756           if such a conjunct contains a reference to a field that is not
21757           an outer field then it is pushed to the corresponding table by
21758           the same rule as all other conjuncts. Otherwise, if the conjunct
21759           is used in WHERE is is pushed to the last joined table, if is it
21760           is used in ON condition of an outer join it is pushed into the
21761           last inner table of the outer join. Such conjuncts are pushed in
21762           a call of make_cond_for_table_from_pred() with the
21763           parameter 'used_table' equal to PSEUDO_TABLE_BITS.
21764         */
21765         if (is_top_and_level && used_table == rand_table_bit &&
21766             (item->used_tables() & ~OUTER_REF_TABLE_BIT) != rand_table_bit)
21767         {
21768           /* The conjunct with RAND_TABLE_BIT has been allready pushed */
21769           continue;
21770         }
21771 	Item *fix=make_cond_for_table_from_pred(thd, root_cond, item,
21772                                                 tables, used_table,
21773                                                 join_tab_idx_arg,
21774                                                 exclude_expensive_cond,
21775                                                 retain_ref_cond, false);
21776 	if (fix)
21777 	  new_cond->argument_list()->push_back(fix, thd->mem_root);
21778       }
21779       switch (new_cond->argument_list()->elements) {
21780       case 0:
21781 	return (COND*) 0;			// Always true
21782       case 1:
21783 	return new_cond->argument_list()->head();
21784       default:
21785 	/*
21786           Call fix_fields to propagate all properties of the children to
21787           the new parent Item. This should not be expensive because all
21788 	  children of Item_cond_and should be fixed by now.
21789 	*/
21790 	if (new_cond->fix_fields(thd, 0))
21791           return (COND*) 0;
21792 	new_cond->used_tables_cache=
21793 	  ((Item_cond_and*) cond)->used_tables_cache &
21794 	  tables;
21795 	return new_cond;
21796       }
21797     }
21798     else
21799     {						// Or list
21800       if (is_top_and_level && used_table == rand_table_bit &&
21801           (cond->used_tables() & ~OUTER_REF_TABLE_BIT) != rand_table_bit)
21802       {
21803         /* This top level formula with RAND_TABLE_BIT has been already pushed */
21804         return (COND*) 0;
21805       }
21806 
21807       Item_cond_or *new_cond=new (thd->mem_root) Item_cond_or(thd);
21808       if (!new_cond)
21809 	return (COND*) 0;			// OOM /* purecov: inspected */
21810       List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
21811       Item *item;
21812       while ((item=li++))
21813       {
21814 	Item *fix=make_cond_for_table_from_pred(thd, root_cond, item,
21815                                                 tables, 0L,
21816                                                 join_tab_idx_arg,
21817                                                 exclude_expensive_cond,
21818                                                 retain_ref_cond, false);
21819 	if (!fix)
21820 	  return (COND*) 0;			// Always true
21821 	new_cond->argument_list()->push_back(fix, thd->mem_root);
21822       }
21823       /*
21824         Call fix_fields to propagate all properties of the children to
21825         the new parent Item. This should not be expensive because all
21826         children of Item_cond_and should be fixed by now.
21827       */
21828       new_cond->fix_fields(thd, 0);
21829       new_cond->used_tables_cache= ((Item_cond_or*) cond)->used_tables_cache;
21830       new_cond->top_level_item();
21831       return new_cond;
21832     }
21833   }
21834 
21835   if (is_top_and_level && used_table == rand_table_bit &&
21836       (cond->used_tables() & ~OUTER_REF_TABLE_BIT) != rand_table_bit)
21837   {
21838     /* This top level formula with RAND_TABLE_BIT has been already pushed */
21839     return (COND*) 0;
21840   }
21841 
21842   /*
21843     Because the following test takes a while and it can be done
21844     table_count times, we mark each item that we have examined with the result
21845     of the test
21846   */
21847   if ((cond->marker == 3 && !retain_ref_cond) ||
21848       (cond->used_tables() & ~tables))
21849     return (COND*) 0;				// Can't check this yet
21850 
21851   if (cond->marker == 2 || cond->eq_cmp_result() == Item::COND_OK)
21852   {
21853     cond->set_join_tab_idx(join_tab_idx_arg);
21854     return cond;				// Not boolean op
21855   }
21856 
21857   if (cond->type() == Item::FUNC_ITEM &&
21858       ((Item_func*) cond)->functype() == Item_func::EQ_FUNC)
21859   {
21860     Item *left_item=	((Item_func*) cond)->arguments()[0]->real_item();
21861     Item *right_item= ((Item_func*) cond)->arguments()[1]->real_item();
21862     if (left_item->type() == Item::FIELD_ITEM && !retain_ref_cond &&
21863 	test_if_ref(root_cond, (Item_field*) left_item,right_item))
21864     {
21865       cond->marker=3;			// Checked when read
21866       return (COND*) 0;
21867     }
21868     if (right_item->type() == Item::FIELD_ITEM && !retain_ref_cond &&
21869 	test_if_ref(root_cond, (Item_field*) right_item,left_item))
21870     {
21871       cond->marker=3;			// Checked when read
21872       return (COND*) 0;
21873     }
21874   }
21875   cond->marker=2;
21876   cond->set_join_tab_idx(join_tab_idx_arg);
21877   return cond;
21878 }
21879 
21880 
21881 /*
21882   The difference of this from make_cond_for_table() is that we're in the
21883   following state:
21884     1. conditions referring to 'tables' have been checked
21885     2. conditions referring to sjm_tables have been checked, too
21886     3. We need condition that couldn't be checked in #1 or #2 but
21887        can be checked when we get both (tables | sjm_tables).
21888 
21889 */
21890 static COND *
make_cond_after_sjm(THD * thd,Item * root_cond,Item * cond,table_map tables,table_map sjm_tables,bool inside_or_clause)21891 make_cond_after_sjm(THD *thd, Item *root_cond, Item *cond, table_map tables,
21892                     table_map sjm_tables, bool inside_or_clause)
21893 {
21894   /*
21895     We assume that conditions that refer to only join prefix tables or
21896     sjm_tables have already been checked.
21897   */
21898   if (!inside_or_clause)
21899   {
21900     table_map cond_used_tables= cond->used_tables();
21901     if((!(cond_used_tables & ~tables) ||
21902        !(cond_used_tables & ~sjm_tables)))
21903       return (COND*) 0;				// Already checked
21904   }
21905 
21906   /* AND/OR recursive descent */
21907   if (cond->type() == Item::COND_ITEM)
21908   {
21909     if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
21910     {
21911       /* Create new top level AND item */
21912       Item_cond_and *new_cond= new (thd->mem_root) Item_cond_and(thd);
21913       if (!new_cond)
21914 	return (COND*) 0;			// OOM /* purecov: inspected */
21915       List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
21916       Item *item;
21917       while ((item=li++))
21918       {
21919         Item *fix=make_cond_after_sjm(thd, root_cond, item, tables, sjm_tables,
21920                                       inside_or_clause);
21921 	if (fix)
21922 	  new_cond->argument_list()->push_back(fix, thd->mem_root);
21923       }
21924       switch (new_cond->argument_list()->elements) {
21925       case 0:
21926 	return (COND*) 0;			// Always true
21927       case 1:
21928 	return new_cond->argument_list()->head();
21929       default:
21930 	/*
21931 	  Item_cond_and do not need fix_fields for execution, its parameters
21932 	  are fixed or do not need fix_fields, too
21933 	*/
21934 	new_cond->quick_fix_field();
21935 	new_cond->used_tables_cache=
21936 	  ((Item_cond_and*) cond)->used_tables_cache &
21937 	  tables;
21938 	return new_cond;
21939       }
21940     }
21941     else
21942     {						// Or list
21943       Item_cond_or *new_cond= new (thd->mem_root) Item_cond_or(thd);
21944       if (!new_cond)
21945 	return (COND*) 0;			// OOM /* purecov: inspected */
21946       List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
21947       Item *item;
21948       while ((item=li++))
21949       {
21950         Item *fix= make_cond_after_sjm(thd, root_cond, item, tables, sjm_tables,
21951                                        /*inside_or_clause= */TRUE);
21952 	if (!fix)
21953 	  return (COND*) 0;			// Always true
21954 	new_cond->argument_list()->push_back(fix, thd->mem_root);
21955       }
21956       /*
21957 	Item_cond_or do not need fix_fields for execution, its parameters
21958 	are fixed or do not need fix_fields, too
21959       */
21960       new_cond->quick_fix_field();
21961       new_cond->used_tables_cache= ((Item_cond_or*) cond)->used_tables_cache;
21962       new_cond->top_level_item();
21963       return new_cond;
21964     }
21965   }
21966 
21967   /*
21968     Because the following test takes a while and it can be done
21969     table_count times, we mark each item that we have examined with the result
21970     of the test
21971   */
21972 
21973   if (cond->marker == 3 || (cond->used_tables() & ~(tables | sjm_tables)))
21974     return (COND*) 0;				// Can't check this yet
21975   if (cond->marker == 2 || cond->eq_cmp_result() == Item::COND_OK)
21976     return cond;				// Not boolean op
21977 
21978   /*
21979     Remove equalities that are guaranteed to be true by use of 'ref' access
21980     method
21981   */
21982   if (((Item_func*) cond)->functype() == Item_func::EQ_FUNC)
21983   {
21984     Item *left_item= ((Item_func*) cond)->arguments()[0]->real_item();
21985     Item *right_item= ((Item_func*) cond)->arguments()[1]->real_item();
21986     if (left_item->type() == Item::FIELD_ITEM &&
21987 	test_if_ref(root_cond, (Item_field*) left_item,right_item))
21988     {
21989       cond->marker=3;			// Checked when read
21990       return (COND*) 0;
21991     }
21992     if (right_item->type() == Item::FIELD_ITEM &&
21993 	test_if_ref(root_cond, (Item_field*) right_item,left_item))
21994     {
21995       cond->marker=3;			// Checked when read
21996       return (COND*) 0;
21997     }
21998   }
21999   cond->marker=2;
22000   return cond;
22001 }
22002 
22003 
22004 /*
22005   @brief
22006 
22007   Check if
22008    - @table uses "ref"-like access
22009    - it is based on "@field=certain_item" equality
22010    - the equality will be true for any record returned by the access method
22011   and return the certain_item if yes.
22012 
22013   @detail
22014 
22015   Equality won't necessarily hold if:
22016    - the used index covers only part of the @field.
22017      Suppose, we have a CHAR(5) field and INDEX(field(3)). if you make a lookup
22018      for 'abc', you will get both record with 'abc' and with 'abcde'.
22019    - The type of access is actually ref_or_null, and so @field can be either
22020      a value or NULL.
22021 
22022   @return
22023     Item that the field will be equal to
22024     NULL if no such item
22025 */
22026 
22027 static Item *
part_of_refkey(TABLE * table,Field * field)22028 part_of_refkey(TABLE *table,Field *field)
22029 {
22030   JOIN_TAB *join_tab= table->reginfo.join_tab;
22031   if (!join_tab)
22032     return (Item*) 0;             // field from outer non-select (UPDATE,...)
22033 
22034   uint ref_parts= join_tab->ref.key_parts;
22035   if (ref_parts) /* if it's ref/eq_ref/ref_or_null */
22036   {
22037     uint key= join_tab->ref.key;
22038     KEY *key_info= join_tab->get_keyinfo_by_key_no(key);
22039     KEY_PART_INFO *key_part= key_info->key_part;
22040 
22041     for (uint part=0 ; part < ref_parts ; part++,key_part++)
22042     {
22043       if (field->eq(key_part->field))
22044       {
22045         /*
22046           Found the field in the key. Check that
22047            1. ref_or_null doesn't alternate this component between a value and
22048               a NULL
22049            2. index fully covers the key
22050         */
22051         if (part != join_tab->ref.null_ref_part &&            // (1)
22052             !(key_part->key_part_flag & HA_PART_KEY_SEG))     // (2)
22053         {
22054           return join_tab->ref.items[part];
22055         }
22056         break;
22057       }
22058     }
22059   }
22060   return (Item*) 0;
22061 }
22062 
22063 
22064 /**
22065   Test if one can use the key to resolve ORDER BY.
22066 
22067   @param join                  if not NULL, can use the join's top-level
22068                                multiple-equalities.
22069   @param order                 Sort order
22070   @param table                 Table to sort
22071   @param idx                   Index to check
22072   @param used_key_parts [out]  NULL by default, otherwise return value for
22073                                used key parts.
22074 
22075 
22076   @note
22077     used_key_parts is set to correct key parts used if return value != 0
22078     (On other cases, used_key_part may be changed)
22079     Note that the value may actually be greater than the number of index
22080     key parts. This can happen for storage engines that have the primary
22081     key parts as a suffix for every secondary key.
22082 
22083   @retval
22084     1   key is ok.
22085   @retval
22086     0   Key can't be used
22087   @retval
22088     -1   Reverse key can be used
22089 */
22090 
test_if_order_by_key(JOIN * join,ORDER * order,TABLE * table,uint idx,uint * used_key_parts)22091 static int test_if_order_by_key(JOIN *join,
22092                                 ORDER *order, TABLE *table, uint idx,
22093 				uint *used_key_parts)
22094 {
22095   KEY_PART_INFO *key_part,*key_part_end;
22096   key_part=table->key_info[idx].key_part;
22097   key_part_end=key_part + table->key_info[idx].ext_key_parts;
22098   key_part_map const_key_parts=table->const_key_parts[idx];
22099   uint user_defined_kp= table->key_info[idx].user_defined_key_parts;
22100   int reverse=0;
22101   uint key_parts;
22102   bool have_pk_suffix= false;
22103   uint pk= table->s->primary_key;
22104   DBUG_ENTER("test_if_order_by_key");
22105 
22106   if ((table->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX) &&
22107       table->key_info[idx].ext_key_part_map &&
22108       pk != MAX_KEY && pk != idx)
22109   {
22110     have_pk_suffix= true;
22111   }
22112 
22113   for (; order ; order=order->next, const_key_parts>>=1)
22114   {
22115     Item_field *item_field= ((Item_field*) (*order->item)->real_item());
22116     Field *field= item_field->field;
22117     int flag;
22118 
22119     /*
22120       Skip key parts that are constants in the WHERE clause.
22121       These are already skipped in the ORDER BY by const_expression_in_where()
22122     */
22123     for (; const_key_parts & 1 ; const_key_parts>>= 1)
22124       key_part++;
22125 
22126     /*
22127       This check was in this function historically (although I think it's
22128       better to check it outside of this function):
22129 
22130       "Test if the primary key parts were all const (i.e. there's one row).
22131        The sorting doesn't matter"
22132 
22133        So, we're checking that
22134        (1) this is an extended key
22135        (2) we've reached its end
22136     */
22137     key_parts= (uint)(key_part - table->key_info[idx].key_part);
22138     if (have_pk_suffix &&
22139         reverse == 0 && // all were =const so far
22140         key_parts == table->key_info[idx].ext_key_parts &&
22141         table->const_key_parts[pk] == PREV_BITS(uint,
22142                                                 table->key_info[pk].
22143                                                 user_defined_key_parts))
22144     {
22145       key_parts= 0;
22146       reverse= 1;                           // Key is ok to use
22147       goto ok;
22148     }
22149 
22150     if (key_part == key_part_end)
22151     {
22152       /*
22153         There are some items left in ORDER BY that we don't
22154       */
22155       DBUG_RETURN(0);
22156     }
22157 
22158     if (key_part->field != field)
22159     {
22160       /*
22161         Check if there is a multiple equality that allows to infer that field
22162         and key_part->field are equal
22163         (see also: compute_part_of_sort_key_for_equals)
22164       */
22165       if (item_field->item_equal &&
22166           item_field->item_equal->contains(key_part->field))
22167         field= key_part->field;
22168     }
22169     if (key_part->field != field || !field->part_of_sortkey.is_set(idx))
22170       DBUG_RETURN(0);
22171 
22172     const ORDER::enum_order keypart_order=
22173       (key_part->key_part_flag & HA_REVERSE_SORT) ?
22174       ORDER::ORDER_DESC : ORDER::ORDER_ASC;
22175     /* set flag to 1 if we can use read-next on key, else to -1 */
22176     flag= (order->direction == keypart_order) ? 1 : -1;
22177     if (reverse && flag != reverse)
22178       DBUG_RETURN(0);
22179     reverse=flag;				// Remember if reverse
22180     if (key_part < key_part_end)
22181       key_part++;
22182   }
22183 
22184   key_parts= (uint) (key_part - table->key_info[idx].key_part);
22185 
22186   if (reverse == -1 &&
22187       !(table->file->index_flags(idx, user_defined_kp-1, 1) & HA_READ_PREV))
22188     reverse= 0;                               // Index can't be used
22189 
22190   if (have_pk_suffix && reverse == -1)
22191   {
22192     uint pk_parts= table->key_info[pk].user_defined_key_parts;
22193     if (!(table->file->index_flags(pk, pk_parts, 1) & HA_READ_PREV))
22194       reverse= 0;                               // Index can't be used
22195   }
22196 
22197 ok:
22198   if (used_key_parts != NULL)
22199     *used_key_parts= key_parts;
22200   DBUG_RETURN(reverse);
22201 }
22202 
22203 
22204 /**
22205   Find shortest key suitable for full table scan.
22206 
22207   @param table                 Table to scan
22208   @param usable_keys           Allowed keys
22209 
22210   @return
22211     MAX_KEY     no suitable key found
22212     key index   otherwise
22213 */
22214 
find_shortest_key(TABLE * table,const key_map * usable_keys)22215 uint find_shortest_key(TABLE *table, const key_map *usable_keys)
22216 {
22217   double min_cost= DBL_MAX;
22218   uint best= MAX_KEY;
22219   if (!usable_keys->is_clear_all())
22220   {
22221     for (uint nr=0; nr < table->s->keys ; nr++)
22222     {
22223       if (usable_keys->is_set(nr))
22224       {
22225         double cost= table->file->keyread_time(nr, 1, table->file->records());
22226         if (cost < min_cost)
22227         {
22228           min_cost= cost;
22229           best=nr;
22230         }
22231         DBUG_ASSERT(best < MAX_KEY);
22232       }
22233     }
22234   }
22235   return best;
22236 }
22237 
22238 /**
22239   Test if a second key is the subkey of the first one.
22240 
22241   @param key_part              First key parts
22242   @param ref_key_part          Second key parts
22243   @param ref_key_part_end      Last+1 part of the second key
22244 
22245   @note
22246     Second key MUST be shorter than the first one.
22247 
22248   @retval
22249     1	is a subkey
22250   @retval
22251     0	no sub key
22252 */
22253 
22254 inline bool
is_subkey(KEY_PART_INFO * key_part,KEY_PART_INFO * ref_key_part,KEY_PART_INFO * ref_key_part_end)22255 is_subkey(KEY_PART_INFO *key_part, KEY_PART_INFO *ref_key_part,
22256 	  KEY_PART_INFO *ref_key_part_end)
22257 {
22258   for (; ref_key_part < ref_key_part_end; key_part++, ref_key_part++)
22259     if (!key_part->field->eq(ref_key_part->field))
22260       return 0;
22261   return 1;
22262 }
22263 
22264 /**
22265   Test if we can use one of the 'usable_keys' instead of 'ref' key
22266   for sorting.
22267 
22268   @param ref			Number of key, used for WHERE clause
22269   @param usable_keys		Keys for testing
22270 
22271   @return
22272     - MAX_KEY			If we can't use other key
22273     - the number of found key	Otherwise
22274 */
22275 
22276 static uint
test_if_subkey(ORDER * order,TABLE * table,uint ref,uint ref_key_parts,const key_map * usable_keys)22277 test_if_subkey(ORDER *order, TABLE *table, uint ref, uint ref_key_parts,
22278 	       const key_map *usable_keys)
22279 {
22280   uint nr;
22281   uint min_length= (uint) ~0;
22282   uint best= MAX_KEY;
22283   KEY_PART_INFO *ref_key_part= table->key_info[ref].key_part;
22284   KEY_PART_INFO *ref_key_part_end= ref_key_part + ref_key_parts;
22285 
22286   /*
22287     Find the shortest key that
22288     - produces the required ordering
22289     - has key #ref (up to ref_key_parts) as its subkey.
22290   */
22291   for (nr= 0 ; nr < table->s->keys ; nr++)
22292   {
22293     if (usable_keys->is_set(nr) &&
22294 	table->key_info[nr].key_length < min_length &&
22295 	table->key_info[nr].user_defined_key_parts >= ref_key_parts &&
22296 	is_subkey(table->key_info[nr].key_part, ref_key_part,
22297 		  ref_key_part_end) &&
22298 	test_if_order_by_key(NULL, order, table, nr))
22299     {
22300       min_length= table->key_info[nr].key_length;
22301       best= nr;
22302     }
22303   }
22304   return best;
22305 }
22306 
22307 
22308 /**
22309   Check if GROUP BY/DISTINCT can be optimized away because the set is
22310   already known to be distinct.
22311 
22312   Used in removing the GROUP BY/DISTINCT of the following types of
22313   statements:
22314   @code
22315     SELECT [DISTINCT] <unique_key_cols>... FROM <single_table_ref>
22316       [GROUP BY <unique_key_cols>,...]
22317   @endcode
22318 
22319     If (a,b,c is distinct)
22320     then <any combination of a,b,c>,{whatever} is also distinct
22321 
22322     This function checks if all the key parts of any of the unique keys
22323     of the table are referenced by a list : either the select list
22324     through find_field_in_item_list or GROUP BY list through
22325     find_field_in_order_list.
22326     If the above holds and the key parts cannot contain NULLs then we
22327     can safely remove the GROUP BY/DISTINCT,
22328     as no result set can be more distinct than an unique key.
22329 
22330   @param table                The table to operate on.
22331   @param find_func            function to iterate over the list and search
22332                               for a field
22333 
22334   @retval
22335     1                    found
22336   @retval
22337     0                    not found.
22338 */
22339 
22340 static bool
list_contains_unique_index(TABLE * table,bool (* find_func)(Field *,void *),void * data)22341 list_contains_unique_index(TABLE *table,
22342                           bool (*find_func) (Field *, void *), void *data)
22343 {
22344   for (uint keynr= 0; keynr < table->s->keys; keynr++)
22345   {
22346     if (keynr == table->s->primary_key ||
22347          (table->key_info[keynr].flags & HA_NOSAME))
22348     {
22349       KEY *keyinfo= table->key_info + keynr;
22350       KEY_PART_INFO *key_part, *key_part_end;
22351 
22352       for (key_part=keyinfo->key_part,
22353            key_part_end=key_part+ keyinfo->user_defined_key_parts;
22354            key_part < key_part_end;
22355            key_part++)
22356       {
22357         if (key_part->field->maybe_null() ||
22358             !find_func(key_part->field, data))
22359           break;
22360       }
22361       if (key_part == key_part_end)
22362         return 1;
22363     }
22364   }
22365   return 0;
22366 }
22367 
22368 
22369 /**
22370   Helper function for list_contains_unique_index.
22371   Find a field reference in a list of ORDER structures.
22372   Finds a direct reference of the Field in the list.
22373 
22374   @param field                The field to search for.
22375   @param data                 ORDER *.The list to search in
22376 
22377   @retval
22378     1                    found
22379   @retval
22380     0                    not found.
22381 */
22382 
22383 static bool
find_field_in_order_list(Field * field,void * data)22384 find_field_in_order_list (Field *field, void *data)
22385 {
22386   ORDER *group= (ORDER *) data;
22387   bool part_found= 0;
22388   for (ORDER *tmp_group= group; tmp_group; tmp_group=tmp_group->next)
22389   {
22390     Item *item= (*tmp_group->item)->real_item();
22391     if (item->type() == Item::FIELD_ITEM &&
22392         ((Item_field*) item)->field->eq(field))
22393     {
22394       part_found= 1;
22395       break;
22396     }
22397   }
22398   return part_found;
22399 }
22400 
22401 
22402 /**
22403   Helper function for list_contains_unique_index.
22404   Find a field reference in a dynamic list of Items.
22405   Finds a direct reference of the Field in the list.
22406 
22407   @param[in] field             The field to search for.
22408   @param[in] data              List<Item> *.The list to search in
22409 
22410   @retval
22411     1                    found
22412   @retval
22413     0                    not found.
22414 */
22415 
22416 static bool
find_field_in_item_list(Field * field,void * data)22417 find_field_in_item_list (Field *field, void *data)
22418 {
22419   List<Item> *fields= (List<Item> *) data;
22420   bool part_found= 0;
22421   List_iterator<Item> li(*fields);
22422   Item *item;
22423 
22424   while ((item= li++))
22425   {
22426     if (item->real_item()->type() == Item::FIELD_ITEM &&
22427 	((Item_field*) (item->real_item()))->field->eq(field))
22428     {
22429       part_found= 1;
22430       break;
22431     }
22432   }
22433   return part_found;
22434 }
22435 
22436 
22437 /*
22438   Fill *col_keys with a union of Field::part_of_sortkey of all fields
22439   that belong to 'table' and are equal to 'item_field'.
22440 */
22441 
compute_part_of_sort_key_for_equals(JOIN * join,TABLE * table,Item_field * item_field,key_map * col_keys)22442 void compute_part_of_sort_key_for_equals(JOIN *join, TABLE *table,
22443                                          Item_field *item_field,
22444                                          key_map *col_keys)
22445 {
22446   col_keys->clear_all();
22447   col_keys->merge(item_field->field->part_of_sortkey);
22448 
22449   if (!optimizer_flag(join->thd, OPTIMIZER_SWITCH_ORDERBY_EQ_PROP))
22450     return;
22451 
22452   Item_equal *item_eq= NULL;
22453 
22454   if (item_field->item_equal)
22455   {
22456     /*
22457       The item_field is from ORDER structure, but it already has an item_equal
22458       pointer set (UseMultipleEqualitiesToRemoveTempTable code have set it)
22459     */
22460     item_eq= item_field->item_equal;
22461   }
22462   else
22463   {
22464     /*
22465       Walk through join's muliple equalities and find the one that contains
22466       item_field.
22467     */
22468     if (!join->cond_equal)
22469       return;
22470     table_map needed_tbl_map= item_field->used_tables() | table->map;
22471     List_iterator<Item_equal> li(join->cond_equal->current_level);
22472     Item_equal *cur_item_eq;
22473     while ((cur_item_eq= li++))
22474     {
22475       if ((cur_item_eq->used_tables() & needed_tbl_map) &&
22476           cur_item_eq->contains(item_field->field))
22477       {
22478         item_eq= cur_item_eq;
22479         item_field->item_equal= item_eq; // Save the pointer to our Item_equal.
22480         break;
22481       }
22482     }
22483   }
22484 
22485   if (item_eq)
22486   {
22487     Item_equal_fields_iterator it(*item_eq);
22488     Item *item;
22489     /* Loop through other members that belong to table table */
22490     while ((item= it++))
22491     {
22492       if (item->type() == Item::FIELD_ITEM &&
22493           ((Item_field*)item)->field->table == table)
22494       {
22495         col_keys->merge(((Item_field*)item)->field->part_of_sortkey);
22496       }
22497     }
22498   }
22499 }
22500 
22501 
22502 /**
22503   Test if we can skip the ORDER BY by using an index.
22504 
22505   If we can use an index, the JOIN_TAB / tab->select struct
22506   is changed to use the index.
22507 
22508   The index must cover all fields in <order>, or it will not be considered.
22509 
22510   @param no_changes No changes will be made to the query plan.
22511 
22512   @todo
22513     - sergeyp: Results of all index merge selects actually are ordered
22514     by clustered PK values.
22515 
22516   @retval
22517     0    We have to use filesort to do the sorting
22518   @retval
22519     1    We can use an index.
22520 */
22521 
22522 static bool
test_if_skip_sort_order(JOIN_TAB * tab,ORDER * order,ha_rows select_limit,bool no_changes,const key_map * map)22523 test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
22524 			bool no_changes, const key_map *map)
22525 {
22526   int ref_key;
22527   uint UNINIT_VAR(ref_key_parts);
22528   int order_direction= 0;
22529   uint used_key_parts= 0;
22530   TABLE *table=tab->table;
22531   SQL_SELECT *select=tab->select;
22532   key_map usable_keys;
22533   QUICK_SELECT_I *save_quick= select ? select->quick : 0;
22534   Item *orig_cond= 0;
22535   bool orig_cond_saved= false;
22536   int best_key= -1;
22537   bool changed_key= false;
22538   DBUG_ENTER("test_if_skip_sort_order");
22539 
22540   /* Check that we are always called with first non-const table */
22541   DBUG_ASSERT(tab == tab->join->join_tab + tab->join->const_tables);
22542 
22543   /*
22544     Keys disabled by ALTER TABLE ... DISABLE KEYS should have already
22545     been taken into account.
22546   */
22547   usable_keys= *map;
22548 
22549   /* Find indexes that cover all ORDER/GROUP BY fields */
22550   for (ORDER *tmp_order=order; tmp_order ; tmp_order=tmp_order->next)
22551   {
22552     Item *item= (*tmp_order->item)->real_item();
22553     if (item->type() != Item::FIELD_ITEM)
22554     {
22555       usable_keys.clear_all();
22556       DBUG_RETURN(0);
22557     }
22558 
22559     /*
22560       Take multiple-equalities into account. Suppose we have
22561         ORDER BY col1, col10
22562       and there are
22563          multiple-equal(col1, col2, col3),
22564          multiple-equal(col10, col11).
22565 
22566       Then,
22567       - when item=col1, we find the set of indexes that cover one of {col1,
22568         col2, col3}
22569       - when item=col10, we find the set of indexes that cover one of {col10,
22570         col11}
22571 
22572       And we compute an intersection of these sets to find set of indexes that
22573       cover all ORDER BY components.
22574     */
22575     key_map col_keys;
22576     compute_part_of_sort_key_for_equals(tab->join, table, (Item_field*)item,
22577                                         &col_keys);
22578     usable_keys.intersect(col_keys);
22579     if (usable_keys.is_clear_all())
22580       goto use_filesort;                        // No usable keys
22581   }
22582 
22583   ref_key= -1;
22584   /* Test if constant range in WHERE */
22585   if (tab->ref.key >= 0 && tab->ref.key_parts)
22586   {
22587     ref_key=	   tab->ref.key;
22588     ref_key_parts= tab->ref.key_parts;
22589     /*
22590       todo: why does JT_REF_OR_NULL mean filesort? We could find another index
22591       that satisfies the ordering. I would just set ref_key=MAX_KEY here...
22592     */
22593     if (tab->type == JT_REF_OR_NULL || tab->type == JT_FT ||
22594         tab->ref.uses_splitting)
22595       goto use_filesort;
22596   }
22597   else if (select && select->quick)		// Range found by opt_range
22598   {
22599     int quick_type= select->quick->get_type();
22600     /*
22601       assume results are not ordered when index merge is used
22602       TODO: sergeyp: Results of all index merge selects actually are ordered
22603       by clustered PK values.
22604     */
22605 
22606     if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE ||
22607         quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT ||
22608         quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION ||
22609         quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT)
22610     {
22611       /*
22612         we set ref_key=MAX_KEY instead of -1, because test_if_cheaper ordering
22613         assumes that "ref_key==-1" means doing full index scan.
22614         (This is not very straightforward and we got into this situation for
22615          historical reasons. Should be fixed at some point).
22616       */
22617       ref_key= MAX_KEY;
22618     }
22619     else
22620     {
22621       ref_key= select->quick->index;
22622       ref_key_parts= select->quick->used_key_parts;
22623     }
22624   }
22625 
22626   if (ref_key >= 0 && ref_key != MAX_KEY)
22627   {
22628     /* Current access method uses index ref_key with ref_key_parts parts */
22629     if (!usable_keys.is_set(ref_key))
22630     {
22631       /* However, ref_key doesn't match the needed ordering */
22632       uint new_ref_key;
22633 
22634       /*
22635 	If using index only read, only consider other possible index only
22636 	keys
22637       */
22638       if (table->covering_keys.is_set(ref_key))
22639 	usable_keys.intersect(table->covering_keys);
22640       if (tab->pre_idx_push_select_cond)
22641       {
22642         orig_cond= tab->set_cond(tab->pre_idx_push_select_cond);
22643         orig_cond_saved= true;
22644       }
22645 
22646       if ((new_ref_key= test_if_subkey(order, table, ref_key, ref_key_parts,
22647 				       &usable_keys)) < MAX_KEY)
22648       {
22649         /*
22650           Index new_ref_key
22651           - produces the required ordering,
22652           - also has the same columns as ref_key for #ref_key_parts (this
22653             means we will read the same number of rows as with ref_key).
22654         */
22655 
22656         /*
22657           If new_ref_key allows to construct a quick select which uses more key
22658           parts than ref(new_ref_key) would, do that.
22659 
22660           Otherwise, construct a ref access (todo: it's not clear what is the
22661           win in using ref access when we could use quick select also?)
22662         */
22663         if ((table->quick_keys.is_set(new_ref_key) &&
22664              table->quick_key_parts[new_ref_key] > ref_key_parts) ||
22665              !(tab->ref.key >= 0))
22666 	{
22667           /*
22668             The range optimizer constructed QUICK_RANGE for ref_key, and
22669             we want to use instead new_ref_key as the index. We can't
22670             just change the index of the quick select, because this may
22671             result in an inconsistent QUICK_SELECT object. Below we
22672             create a new QUICK_SELECT from scratch so that all its
22673             parameters are set correctly by the range optimizer.
22674            */
22675           key_map new_ref_key_map;
22676           COND *save_cond;
22677           bool res;
22678           new_ref_key_map.clear_all();  // Force the creation of quick select
22679           new_ref_key_map.set_bit(new_ref_key); // only for new_ref_key.
22680 
22681           /* Reset quick;  This will be restored in 'use_filesort' if needed */
22682           select->quick= 0;
22683           save_cond= select->cond;
22684           if (select->pre_idx_push_select_cond)
22685             select->cond= select->pre_idx_push_select_cond;
22686           res= select->test_quick_select(tab->join->thd, new_ref_key_map, 0,
22687                                          (tab->join->select_options &
22688                                           OPTION_FOUND_ROWS) ?
22689                                          HA_POS_ERROR :
22690                                          tab->join->unit->select_limit_cnt,TRUE,
22691                                          TRUE, FALSE) <= 0;
22692           if (res)
22693           {
22694             select->cond= save_cond;
22695             goto use_filesort;
22696           }
22697           DBUG_ASSERT(tab->select->quick);
22698           tab->type= JT_ALL;
22699           tab->ref.key= -1;
22700           tab->ref.key_parts= 0;
22701           tab->use_quick= 1;
22702           best_key= new_ref_key;
22703           /*
22704             We don't restore select->cond as we want to use the
22705             original condition as index condition pushdown is not
22706             active for the new index.
22707             todo: why not perform index condition pushdown for the new index?
22708           */
22709 	}
22710         else
22711 	{
22712           /*
22713             We'll use ref access method on key new_ref_key. In general case
22714             the index search tuple for new_ref_key will be different (e.g.
22715             when one index is defined as (part1, part2, ...) and another as
22716             (part1, part2(N), ...) and the WHERE clause contains
22717             "part1 = const1 AND part2=const2".
22718             So we build tab->ref from scratch here.
22719           */
22720           KEYUSE *keyuse= tab->keyuse;
22721           while (keyuse->key != new_ref_key && keyuse->table == tab->table)
22722             keyuse++;
22723           if (create_ref_for_key(tab->join, tab, keyuse, FALSE,
22724                                  (tab->join->const_table_map |
22725                                   OUTER_REF_TABLE_BIT)))
22726             goto use_filesort;
22727 
22728           pick_table_access_method(tab);
22729 	}
22730 
22731         ref_key= new_ref_key;
22732         changed_key= true;
22733      }
22734     }
22735     /* Check if we get the rows in requested sorted order by using the key */
22736     if (usable_keys.is_set(ref_key) &&
22737         (order_direction= test_if_order_by_key(tab->join, order,table,ref_key,
22738 					       &used_key_parts)))
22739       goto check_reverse_order;
22740   }
22741   {
22742     uint UNINIT_VAR(best_key_parts);
22743     uint saved_best_key_parts= 0;
22744     int best_key_direction= 0;
22745     JOIN *join= tab->join;
22746     ha_rows table_records= table->stat_records();
22747 
22748     test_if_cheaper_ordering(tab, order, table, usable_keys,
22749                              ref_key, select_limit,
22750                              &best_key, &best_key_direction,
22751                              &select_limit, &best_key_parts,
22752                              &saved_best_key_parts);
22753 
22754     /*
22755       filesort() and join cache are usually faster than reading in
22756       index order and not using join cache, except in case that chosen
22757       index is clustered key.
22758     */
22759     if (best_key < 0 ||
22760         ((select_limit >= table_records) &&
22761          (tab->type == JT_ALL &&
22762          tab->join->table_count > tab->join->const_tables + 1) &&
22763          !(table->file->index_flags(best_key, 0, 1) & HA_CLUSTERED_INDEX)))
22764       goto use_filesort;
22765 
22766     if (select && // psergey:  why doesn't this use a quick?
22767         table->quick_keys.is_set(best_key) && best_key != ref_key)
22768     {
22769       key_map tmp_map;
22770       tmp_map.clear_all();       // Force the creation of quick select
22771       tmp_map.set_bit(best_key); // only best_key.
22772       select->quick= 0;
22773 
22774       bool cond_saved= false;
22775       Item *saved_cond;
22776 
22777       /*
22778         Index Condition Pushdown may have removed parts of the condition for
22779         this table. Temporarily put them back because we want the whole
22780         condition for the range analysis.
22781       */
22782       if (select->pre_idx_push_select_cond)
22783       {
22784         saved_cond= select->cond;
22785         select->cond= select->pre_idx_push_select_cond;
22786         cond_saved= true;
22787       }
22788 
22789       select->test_quick_select(join->thd, tmp_map, 0,
22790                                 join->select_options & OPTION_FOUND_ROWS ?
22791                                 HA_POS_ERROR :
22792                                 join->unit->select_limit_cnt,
22793                                 TRUE, FALSE, FALSE);
22794 
22795       if (cond_saved)
22796         select->cond= saved_cond;
22797     }
22798     order_direction= best_key_direction;
22799     /*
22800       saved_best_key_parts is actual number of used keyparts found by the
22801       test_if_order_by_key function. It could differ from keyinfo->user_defined_key_parts,
22802       thus we have to restore it in case of desc order as it affects
22803       QUICK_SELECT_DESC behaviour.
22804     */
22805     used_key_parts= (order_direction == -1) ?
22806       saved_best_key_parts :  best_key_parts;
22807     changed_key= true;
22808   }
22809 
22810 check_reverse_order:
22811   DBUG_ASSERT(order_direction != 0);
22812 
22813   if (order_direction == -1)		// If ORDER BY ... DESC
22814   {
22815     int quick_type;
22816     if (select && select->quick)
22817     {
22818       /*
22819 	Don't reverse the sort order, if it's already done.
22820         (In some cases test_if_order_by_key() can be called multiple times
22821       */
22822       if (select->quick->reverse_sorted())
22823         goto skipped_filesort;
22824 
22825       quick_type= select->quick->get_type();
22826       if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE ||
22827           quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT ||
22828           quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT ||
22829           quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION ||
22830           quick_type == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)
22831       {
22832         tab->limit= 0;
22833         goto use_filesort;               // Use filesort
22834       }
22835     }
22836   }
22837 
22838   /*
22839     Update query plan with access pattern for doing ordered access
22840     according to what we have decided above.
22841   */
22842   if (!no_changes) // We are allowed to update QEP
22843   {
22844     if (best_key >= 0)
22845     {
22846       bool quick_created=
22847         (select && select->quick && select->quick!=save_quick);
22848 
22849       /*
22850          If ref_key used index tree reading only ('Using index' in EXPLAIN),
22851          and best_key doesn't, then revert the decision.
22852       */
22853       if (table->covering_keys.is_set(best_key))
22854         table->file->ha_start_keyread(best_key);
22855       else
22856         table->file->ha_end_keyread();
22857 
22858       if (!quick_created)
22859       {
22860         if (select)                  // Throw any existing quick select
22861           select->quick= 0;          // Cleanup either reset to save_quick,
22862                                      // or 'delete save_quick'
22863         tab->index= best_key;
22864         tab->read_first_record= order_direction > 0 ?
22865                                 join_read_first:join_read_last;
22866         tab->type=JT_NEXT;           // Read with index_first(), index_next()
22867 
22868         if (tab->pre_idx_push_select_cond)
22869         {
22870           tab->set_cond(tab->pre_idx_push_select_cond);
22871           /*
22872             orig_cond is a part of pre_idx_push_cond,
22873             no need to restore it.
22874           */
22875           orig_cond= 0;
22876           orig_cond_saved= false;
22877         }
22878 
22879         table->file->ha_index_or_rnd_end();
22880         if (tab->join->select_options & SELECT_DESCRIBE)
22881         {
22882           tab->ref.key= -1;
22883           tab->ref.key_parts= 0;
22884           if (select_limit < table->stat_records())
22885             tab->limit= select_limit;
22886           table->file->ha_end_keyread();
22887         }
22888       }
22889       else if (tab->type != JT_ALL || tab->select->quick)
22890       {
22891         /*
22892           We're about to use a quick access to the table.
22893           We need to change the access method so as the quick access
22894           method is actually used.
22895         */
22896         DBUG_ASSERT(tab->select->quick);
22897         tab->type=JT_ALL;
22898         tab->use_quick=1;
22899         tab->ref.key= -1;
22900         tab->ref.key_parts=0;		// Don't use ref key.
22901         tab->read_first_record= join_init_read_record;
22902         if (tab->is_using_loose_index_scan())
22903           tab->join->tmp_table_param.precomputed_group_by= TRUE;
22904 
22905         /*
22906           Restore the original condition as changes done by pushdown
22907           condition are not relevant anymore
22908         */
22909         if (tab->select && tab->select->pre_idx_push_select_cond)
22910 	{
22911           tab->set_cond(tab->select->pre_idx_push_select_cond);
22912            tab->table->file->cancel_pushed_idx_cond();
22913         }
22914         /*
22915           TODO: update the number of records in join->best_positions[tablenr]
22916         */
22917       }
22918     } // best_key >= 0
22919 
22920     if (order_direction == -1)		// If ORDER BY ... DESC
22921     {
22922       if (select && select->quick)
22923       {
22924         /* ORDER BY range_key DESC */
22925         QUICK_SELECT_I *tmp= select->quick->make_reverse(used_key_parts);
22926         if (!tmp)
22927         {
22928           tab->limit= 0;
22929           goto use_filesort;           // Reverse sort failed -> filesort
22930         }
22931         /*
22932           Cancel Pushed Index Condition, as it doesn't work for reverse scans.
22933         */
22934         if (tab->select && tab->select->pre_idx_push_select_cond)
22935 	{
22936           tab->set_cond(tab->select->pre_idx_push_select_cond);
22937            tab->table->file->cancel_pushed_idx_cond();
22938         }
22939         if (select->quick == save_quick)
22940           save_quick= 0;                // make_reverse() consumed it
22941         select->set_quick(tmp);
22942         /* Cancel "Range checked for each record" */
22943         if (tab->use_quick == 2)
22944         {
22945           tab->use_quick= 1;
22946           tab->read_first_record= join_init_read_record;
22947         }
22948       }
22949       else if (tab->type != JT_NEXT && tab->type != JT_REF_OR_NULL &&
22950                tab->ref.key >= 0 && tab->ref.key_parts <= used_key_parts)
22951       {
22952         /*
22953           SELECT * FROM t1 WHERE a=1 ORDER BY a DESC,b DESC
22954 
22955           Use a traversal function that starts by reading the last row
22956           with key part (A) and then traverse the index backwards.
22957         */
22958         tab->read_first_record= join_read_last_key;
22959         tab->read_record.read_record_func= join_read_prev_same;
22960         /* Cancel "Range checked for each record" */
22961         if (tab->use_quick == 2)
22962         {
22963           tab->use_quick= 1;
22964           tab->read_first_record= join_init_read_record;
22965         }
22966         /*
22967           Cancel Pushed Index Condition, as it doesn't work for reverse scans.
22968         */
22969         if (tab->select && tab->select->pre_idx_push_select_cond)
22970 	{
22971           tab->set_cond(tab->select->pre_idx_push_select_cond);
22972            tab->table->file->cancel_pushed_idx_cond();
22973         }
22974       }
22975     }
22976     else if (select && select->quick)
22977     {
22978       /* Cancel "Range checked for each record" */
22979       if (tab->use_quick == 2)
22980       {
22981         tab->use_quick= 1;
22982         tab->read_first_record= join_init_read_record;
22983       }
22984       select->quick->need_sorted_output();
22985     }
22986 
22987     tab->read_record.unlock_row= (tab->type == JT_EQ_REF) ?
22988                                  join_read_key_unlock_row : rr_unlock_row;
22989 
22990   } // QEP has been modified
22991 
22992   /*
22993     Cleanup:
22994     We may have both a 'select->quick' and 'save_quick' (original)
22995     at this point. Delete the one that we wan't use.
22996   */
22997 
22998 skipped_filesort:
22999   // Keep current (ordered) select->quick
23000   if (select && save_quick != select->quick)
23001   {
23002     delete save_quick;
23003     save_quick= NULL;
23004   }
23005   if (orig_cond_saved && !changed_key)
23006     tab->set_cond(orig_cond);
23007   if (!no_changes && changed_key && table->file->pushed_idx_cond)
23008     table->file->cancel_pushed_idx_cond();
23009 
23010   DBUG_RETURN(1);
23011 
23012 use_filesort:
23013   // Restore original save_quick
23014   if (select && select->quick != save_quick)
23015   {
23016     delete select->quick;
23017     select->quick= save_quick;
23018   }
23019   if (orig_cond_saved)
23020     tab->set_cond(orig_cond);
23021 
23022   DBUG_RETURN(0);
23023 }
23024 
23025 
23026 /*
23027   If not selecting by given key, create an index how records should be read
23028 
23029   SYNOPSIS
23030    create_sort_index()
23031      thd		Thread handler
23032      join		Join with table to sort
23033      join_tab		What table to sort
23034      fsort              Filesort object.  NULL means "use tab->filesort".
23035 
23036   IMPLEMENTATION
23037    - If there is an index that can be used, the first non-const join_tab in
23038      'join' is modified to use this index.
23039    - If no index, create with filesort() an index file that can be used to
23040      retrieve rows in order (should be done with 'read_record').
23041      The sorted data is stored in tab->filesort
23042 
23043   RETURN VALUES
23044     0		ok
23045     -1		Some fatal error
23046     1		No records
23047 */
23048 
23049 int
create_sort_index(THD * thd,JOIN * join,JOIN_TAB * tab,Filesort * fsort)23050 create_sort_index(THD *thd, JOIN *join, JOIN_TAB *tab, Filesort *fsort)
23051 {
23052   TABLE *table;
23053   SQL_SELECT *select;
23054   bool quick_created= FALSE;
23055   SORT_INFO *file_sort= 0;
23056   DBUG_ENTER("create_sort_index");
23057 
23058   if (fsort == NULL)
23059     fsort= tab->filesort;
23060 
23061   table=  tab->table;
23062   select= fsort->select;
23063 
23064   table->status=0;				// May be wrong if quick_select
23065 
23066   if (!tab->preread_init_done && tab->preread_init())
23067     goto err;
23068 
23069   // If table has a range, move it to select
23070   if (select && tab->ref.key >= 0)
23071   {
23072     if (!select->quick)
23073     {
23074       if (tab->quick)
23075       {
23076         select->quick= tab->quick;
23077         tab->quick= NULL;
23078       /*
23079         We can only use 'Only index' if quick key is same as ref_key
23080         and in index_merge 'Only index' cannot be used
23081       */
23082       if (((uint) tab->ref.key != select->quick->index))
23083         table->file->ha_end_keyread();
23084       }
23085       else
23086       {
23087         /*
23088 	  We have a ref on a const;  Change this to a range that filesort
23089 	  can use.
23090 	  For impossible ranges (like when doing a lookup on NULL on a NOT NULL
23091 	  field, quick will contain an empty record set.
23092         */
23093         if (!(select->quick= (tab->type == JT_FT ?
23094 			      get_ft_select(thd, table, tab->ref.key) :
23095 			      get_quick_select_for_ref(thd, table, &tab->ref,
23096                                                        tab->found_records))))
23097 	  goto err;
23098         quick_created= TRUE;
23099       }
23100       fsort->own_select= true;
23101     }
23102     else
23103     {
23104       DBUG_ASSERT(tab->type == JT_REF || tab->type == JT_EQ_REF);
23105       // Update ref value
23106       if (unlikely(cp_buffer_from_ref(thd, table, &tab->ref) &&
23107                    thd->is_fatal_error))
23108         goto err;                                   // out of memory
23109     }
23110   }
23111 
23112 
23113   /* Fill schema tables with data before filesort if it's necessary */
23114   if ((join->select_lex->options & OPTION_SCHEMA_TABLE) &&
23115       unlikely(get_schema_tables_result(join, PROCESSED_BY_CREATE_SORT_INDEX)))
23116     goto err;
23117 
23118   if (table->s->tmp_table)
23119     table->file->info(HA_STATUS_VARIABLE);	// Get record count
23120   file_sort= filesort(thd, table, fsort, fsort->tracker, join, tab->table->map);
23121   DBUG_ASSERT(tab->filesort_result == 0);
23122   tab->filesort_result= file_sort;
23123   tab->records= 0;
23124   if (file_sort)
23125   {
23126     tab->records= join->select_options & OPTION_FOUND_ROWS ?
23127       file_sort->found_rows : file_sort->return_rows;
23128     tab->join->join_examined_rows+= file_sort->examined_rows;
23129   }
23130 
23131   if (quick_created)
23132   {
23133     /* This will delete the quick select. */
23134     select->cleanup();
23135   }
23136 
23137   table->file->ha_end_keyread();
23138   if (tab->type == JT_FT)
23139     table->file->ha_ft_end();
23140   else
23141     table->file->ha_index_or_rnd_end();
23142 
23143   DBUG_RETURN(file_sort == 0);
23144 err:
23145   DBUG_RETURN(-1);
23146 }
23147 
23148 
23149 /**
23150   Compare fields from table->record[0] and table->record[1],
23151   possibly skipping few first fields.
23152 
23153   @param table
23154   @param ptr                    field to start the comparison from,
23155                                 somewhere in the table->field[] array
23156 
23157   @retval 1     different
23158   @retval 0     identical
23159 */
compare_record(TABLE * table,Field ** ptr)23160 static bool compare_record(TABLE *table, Field **ptr)
23161 {
23162   for (; *ptr ; ptr++)
23163   {
23164     Field *f= *ptr;
23165     if (f->is_null() != f->is_null(table->s->rec_buff_length) ||
23166         (!f->is_null() && f->cmp_offset(table->s->rec_buff_length)))
23167       return 1;
23168   }
23169   return 0;
23170 }
23171 
copy_blobs(Field ** ptr)23172 static bool copy_blobs(Field **ptr)
23173 {
23174   for (; *ptr ; ptr++)
23175   {
23176     if ((*ptr)->flags & BLOB_FLAG)
23177       if (((Field_blob *) (*ptr))->copy())
23178 	return 1;				// Error
23179   }
23180   return 0;
23181 }
23182 
free_blobs(Field ** ptr)23183 static void free_blobs(Field **ptr)
23184 {
23185   for (; *ptr ; ptr++)
23186   {
23187     if ((*ptr)->flags & BLOB_FLAG)
23188       ((Field_blob *) (*ptr))->free();
23189   }
23190 }
23191 
23192 
23193 /*
23194   @brief
23195     Remove duplicates from a temporary table.
23196 
23197   @detail
23198     Remove duplicate rows from a temporary table. This is used for e.g. queries
23199     like
23200 
23201       select distinct count(*) as CNT from tbl group by col
23202 
23203     Here, we get a group table with count(*) values. It is not possible to
23204     prevent duplicates from appearing in the table (as we don't know the values
23205     before we've done the grouping).  Because of that, we have this function to
23206     scan the temptable (maybe, multiple times) and remove the duplicate rows
23207 
23208     Rows that do not satisfy 'having' condition are also removed.
23209 */
23210 
23211 bool
remove_duplicates()23212 JOIN_TAB::remove_duplicates()
23213 
23214 {
23215   bool error;
23216   ulong keylength= 0;
23217   uint field_count;
23218   List<Item> *fields= (this-1)->fields;
23219   THD *thd= join->thd;
23220 
23221   DBUG_ENTER("remove_duplicates");
23222 
23223   DBUG_ASSERT(join->aggr_tables > 0 && table->s->tmp_table != NO_TMP_TABLE);
23224   THD_STAGE_INFO(join->thd, stage_removing_duplicates);
23225 
23226   //join->explain->ops_tracker.report_duplicate_removal();
23227 
23228   table->reginfo.lock_type=TL_WRITE;
23229 
23230   /* Calculate how many saved fields there is in list */
23231   field_count=0;
23232   List_iterator<Item> it(*fields);
23233   Item *item;
23234   while ((item=it++))
23235   {
23236     if (item->get_tmp_table_field() && ! item->const_item())
23237       field_count++;
23238   }
23239 
23240   if (!field_count && !(join->select_options & OPTION_FOUND_ROWS) && !having)
23241   {                    // only const items with no OPTION_FOUND_ROWS
23242     join->unit->select_limit_cnt= 1;		// Only send first row
23243     DBUG_RETURN(false);
23244   }
23245 
23246   Field **first_field=table->field+table->s->fields - field_count;
23247   for (Field **ptr=first_field; *ptr; ptr++)
23248     keylength+= (*ptr)->sort_length() + (*ptr)->maybe_null();
23249 
23250   /*
23251     Disable LIMIT ROWS EXAMINED in order to avoid interrupting prematurely
23252     duplicate removal, and produce a possibly incomplete query result.
23253   */
23254   thd->lex->limit_rows_examined_cnt= ULONGLONG_MAX;
23255   if (thd->killed == ABORT_QUERY)
23256     thd->reset_killed();
23257 
23258   table->file->info(HA_STATUS_VARIABLE);
23259   if (table->s->db_type() == heap_hton ||
23260       (!table->s->blob_fields &&
23261        ((ALIGN_SIZE(keylength) + HASH_OVERHEAD) * table->file->stats.records <
23262 	thd->variables.sortbuff_size)))
23263     error=remove_dup_with_hash_index(join->thd, table, field_count, first_field,
23264 				     keylength, having);
23265   else
23266     error=remove_dup_with_compare(join->thd, table, first_field, having);
23267 
23268   if (join->select_lex != join->select_lex->master_unit()->fake_select_lex)
23269     thd->lex->set_limit_rows_examined();
23270   free_blobs(first_field);
23271   DBUG_RETURN(error);
23272 }
23273 
23274 
remove_dup_with_compare(THD * thd,TABLE * table,Field ** first_field,Item * having)23275 static int remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field,
23276 				   Item *having)
23277 {
23278   handler *file=table->file;
23279   uchar *record=table->record[0];
23280   int error;
23281   DBUG_ENTER("remove_dup_with_compare");
23282 
23283   if (unlikely(file->ha_rnd_init_with_error(1)))
23284     DBUG_RETURN(1);
23285 
23286   error= file->ha_rnd_next(record);
23287   for (;;)
23288   {
23289     if (unlikely(thd->check_killed()))
23290     {
23291       error=0;
23292       goto err;
23293     }
23294     if (unlikely(error))
23295     {
23296       if (error == HA_ERR_END_OF_FILE)
23297 	break;
23298       goto err;
23299     }
23300     if (having && !having->val_int())
23301     {
23302       if (unlikely((error= file->ha_delete_row(record))))
23303 	goto err;
23304       error= file->ha_rnd_next(record);
23305       continue;
23306     }
23307     if (unlikely(copy_blobs(first_field)))
23308     {
23309       my_message(ER_OUTOFMEMORY, ER_THD(thd,ER_OUTOFMEMORY),
23310                  MYF(ME_FATALERROR));
23311       error=0;
23312       goto err;
23313     }
23314     store_record(table,record[1]);
23315 
23316     /* Read through rest of file and mark duplicated rows deleted */
23317     bool found=0;
23318     for (;;)
23319     {
23320       if (unlikely((error= file->ha_rnd_next(record))))
23321       {
23322 	if (error == HA_ERR_END_OF_FILE)
23323 	  break;
23324 	goto err;
23325       }
23326       if (compare_record(table, first_field) == 0)
23327       {
23328 	if (unlikely((error= file->ha_delete_row(record))))
23329 	  goto err;
23330       }
23331       else if (!found)
23332       {
23333 	found=1;
23334         if (unlikely((error= file->remember_rnd_pos())))
23335           goto err;
23336       }
23337     }
23338     if (!found)
23339       break;					// End of file
23340     /* Restart search on saved row */
23341     if (unlikely((error= file->restart_rnd_next(record))))
23342       goto err;
23343   }
23344 
23345   file->extra(HA_EXTRA_NO_CACHE);
23346   (void) file->ha_rnd_end();
23347   DBUG_RETURN(0);
23348 err:
23349   file->extra(HA_EXTRA_NO_CACHE);
23350   (void) file->ha_rnd_end();
23351   if (error)
23352     file->print_error(error,MYF(0));
23353   DBUG_RETURN(1);
23354 }
23355 
23356 
23357 /**
23358   Generate a hash index for each row to quickly find duplicate rows.
23359 
23360   @note
23361     Note that this will not work on tables with blobs!
23362 */
23363 
remove_dup_with_hash_index(THD * thd,TABLE * table,uint field_count,Field ** first_field,ulong key_length,Item * having)23364 static int remove_dup_with_hash_index(THD *thd, TABLE *table,
23365 				      uint field_count,
23366 				      Field **first_field,
23367 				      ulong key_length,
23368 				      Item *having)
23369 {
23370   uchar *key_buffer, *key_pos, *record=table->record[0];
23371   int error;
23372   handler *file= table->file;
23373   ulong extra_length= ALIGN_SIZE(key_length)-key_length;
23374   uint *field_lengths, *field_length;
23375   HASH hash;
23376   Field **ptr;
23377   DBUG_ENTER("remove_dup_with_hash_index");
23378 
23379   if (unlikely(!my_multi_malloc(MYF(MY_WME),
23380                                 &key_buffer,
23381                                 (uint) ((key_length + extra_length) *
23382                                         (long) file->stats.records),
23383                                 &field_lengths,
23384                                 (uint) (field_count*sizeof(*field_lengths)),
23385                                 NullS)))
23386     DBUG_RETURN(1);
23387 
23388   for (ptr= first_field, field_length=field_lengths ; *ptr ; ptr++)
23389     (*field_length++)= (*ptr)->sort_length();
23390 
23391   if (unlikely(my_hash_init(&hash, &my_charset_bin,
23392                             (uint) file->stats.records, 0,
23393                             key_length, (my_hash_get_key) 0, 0, 0)))
23394   {
23395     my_free(key_buffer);
23396     DBUG_RETURN(1);
23397   }
23398 
23399   if (unlikely((error= file->ha_rnd_init(1))))
23400     goto err;
23401 
23402   key_pos=key_buffer;
23403   for (;;)
23404   {
23405     uchar *org_key_pos;
23406     if (unlikely(thd->check_killed()))
23407     {
23408       error=0;
23409       goto err;
23410     }
23411     if (unlikely((error= file->ha_rnd_next(record))))
23412     {
23413       if (error == HA_ERR_END_OF_FILE)
23414 	break;
23415       goto err;
23416     }
23417     if (having && !having->val_int())
23418     {
23419       if (unlikely((error= file->ha_delete_row(record))))
23420 	goto err;
23421       continue;
23422     }
23423 
23424     /* copy fields to key buffer */
23425     org_key_pos= key_pos;
23426     field_length=field_lengths;
23427     for (ptr= first_field ; *ptr ; ptr++)
23428     {
23429       (*ptr)->make_sort_key(key_pos, *field_length);
23430       key_pos+= (*ptr)->maybe_null() + *field_length++;
23431     }
23432     /* Check if it exists before */
23433     if (my_hash_search(&hash, org_key_pos, key_length))
23434     {
23435       /* Duplicated found ; Remove the row */
23436       if (unlikely((error= file->ha_delete_row(record))))
23437 	goto err;
23438     }
23439     else
23440     {
23441       if (my_hash_insert(&hash, org_key_pos))
23442         goto err;
23443     }
23444     key_pos+=extra_length;
23445   }
23446   my_free(key_buffer);
23447   my_hash_free(&hash);
23448   file->extra(HA_EXTRA_NO_CACHE);
23449   (void) file->ha_rnd_end();
23450   DBUG_RETURN(0);
23451 
23452 err:
23453   my_free(key_buffer);
23454   my_hash_free(&hash);
23455   file->extra(HA_EXTRA_NO_CACHE);
23456   (void) file->ha_rnd_end();
23457   if (unlikely(error))
23458     file->print_error(error,MYF(0));
23459   DBUG_RETURN(1);
23460 }
23461 
23462 
23463 /*
23464   eq_ref: Create the lookup key and check if it is the same as saved key
23465 
23466   SYNOPSIS
23467     cmp_buffer_with_ref()
23468       tab      Join tab of the accessed table
23469       table    The table to read.  This is usually tab->table, except for
23470                semi-join when we might need to make a lookup in a temptable
23471                instead.
23472       tab_ref  The structure with methods to collect index lookup tuple.
23473                This is usually table->ref, except for the case of when we're
23474                doing lookup into semi-join materialization table.
23475 
23476   DESCRIPTION
23477     Used by eq_ref access method: create the index lookup key and check if
23478     we've used this key at previous lookup (If yes, we don't need to repeat
23479     the lookup - the record has been already fetched)
23480 
23481   RETURN
23482     TRUE   No cached record for the key, or failed to create the key (due to
23483            out-of-domain error)
23484     FALSE  The created key is the same as the previous one (and the record
23485            is already in table->record)
23486 */
23487 
23488 static bool
cmp_buffer_with_ref(THD * thd,TABLE * table,TABLE_REF * tab_ref)23489 cmp_buffer_with_ref(THD *thd, TABLE *table, TABLE_REF *tab_ref)
23490 {
23491   bool no_prev_key;
23492   if (!tab_ref->disable_cache)
23493   {
23494     if (!(no_prev_key= tab_ref->key_err))
23495     {
23496       /* Previous access found a row. Copy its key */
23497       memcpy(tab_ref->key_buff2, tab_ref->key_buff, tab_ref->key_length);
23498     }
23499   }
23500   else
23501     no_prev_key= TRUE;
23502   if ((tab_ref->key_err= cp_buffer_from_ref(thd, table, tab_ref)) ||
23503       no_prev_key)
23504     return 1;
23505   return memcmp(tab_ref->key_buff2, tab_ref->key_buff, tab_ref->key_length)
23506     != 0;
23507 }
23508 
23509 
23510 bool
cp_buffer_from_ref(THD * thd,TABLE * table,TABLE_REF * ref)23511 cp_buffer_from_ref(THD *thd, TABLE *table, TABLE_REF *ref)
23512 {
23513   Check_level_instant_set check_level_save(thd, CHECK_FIELD_IGNORE);
23514   MY_BITMAP *old_map= dbug_tmp_use_all_columns(table, &table->write_set);
23515   bool result= 0;
23516 
23517   for (store_key **copy=ref->key_copy ; *copy ; copy++)
23518   {
23519     if ((*copy)->copy() & 1)
23520     {
23521       result= 1;
23522       break;
23523     }
23524   }
23525   dbug_tmp_restore_column_map(&table->write_set, old_map);
23526   return result;
23527 }
23528 
23529 
23530 /*****************************************************************************
23531   Group and order functions
23532 *****************************************************************************/
23533 
23534 /**
23535   Resolve an ORDER BY or GROUP BY column reference.
23536 
23537   Given a column reference (represented by 'order') from a GROUP BY or ORDER
23538   BY clause, find the actual column it represents. If the column being
23539   resolved is from the GROUP BY clause, the procedure searches the SELECT
23540   list 'fields' and the columns in the FROM list 'tables'. If 'order' is from
23541   the ORDER BY clause, only the SELECT list is being searched.
23542 
23543   If 'order' is resolved to an Item, then order->item is set to the found
23544   Item. If there is no item for the found column (that is, it was resolved
23545   into a table field), order->item is 'fixed' and is added to all_fields and
23546   ref_pointer_array.
23547 
23548   ref_pointer_array and all_fields are updated.
23549 
23550   @param[in] thd		    Pointer to current thread structure
23551   @param[in,out] ref_pointer_array  All select, group and order by fields
23552   @param[in] tables                 List of tables to search in (usually
23553     FROM clause)
23554   @param[in] order                  Column reference to be resolved
23555   @param[in] fields                 List of fields to search in (usually
23556     SELECT list)
23557   @param[in,out] all_fields         All select, group and order by fields
23558   @param[in] is_group_field         True if order is a GROUP field, false if
23559                                     ORDER by field
23560   @param[in] add_to_all_fields      If the item is to be added to all_fields and
23561                                     ref_pointer_array, this flag can be set to
23562                                     false to stop the automatic insertion.
23563   @param[in] from_window_spec       If true then order is from a window spec
23564 
23565   @retval
23566     FALSE if OK
23567   @retval
23568     TRUE  if error occurred
23569 */
23570 
23571 static bool
find_order_in_list(THD * thd,Ref_ptr_array ref_pointer_array,TABLE_LIST * tables,ORDER * order,List<Item> & fields,List<Item> & all_fields,bool is_group_field,bool add_to_all_fields,bool from_window_spec)23572 find_order_in_list(THD *thd, Ref_ptr_array ref_pointer_array,
23573                    TABLE_LIST *tables,
23574                    ORDER *order, List<Item> &fields, List<Item> &all_fields,
23575                    bool is_group_field, bool add_to_all_fields,
23576                    bool from_window_spec)
23577 {
23578   Item *order_item= *order->item; /* The item from the GROUP/ORDER caluse. */
23579   Item::Type order_item_type;
23580   Item **select_item; /* The corresponding item from the SELECT clause. */
23581   Field *from_field;  /* The corresponding field from the FROM clause. */
23582   uint counter;
23583   enum_resolution_type resolution;
23584 
23585   /*
23586     Local SP variables may be int but are expressions, not positions.
23587     (And they can't be used before fix_fields is called for them).
23588   */
23589   if (order_item->type() == Item::INT_ITEM && order_item->basic_const_item() &&
23590       !from_window_spec)
23591   {						/* Order by position */
23592     uint count;
23593     if (order->counter_used)
23594       count= order->counter; // counter was once resolved
23595     else
23596       count= (uint) order_item->val_int();
23597     if (!count || count > fields.elements)
23598     {
23599       my_error(ER_BAD_FIELD_ERROR, MYF(0),
23600                order_item->full_name(), thd->where);
23601       return TRUE;
23602     }
23603     thd->change_item_tree((Item **)&order->item, (Item *)&ref_pointer_array[count - 1]);
23604     order->in_field_list= 1;
23605     order->counter= count;
23606     order->counter_used= 1;
23607     return FALSE;
23608   }
23609   /* Lookup the current GROUP/ORDER field in the SELECT clause. */
23610   select_item= find_item_in_list(order_item, fields, &counter,
23611                                  REPORT_EXCEPT_NOT_FOUND, &resolution);
23612   if (!select_item)
23613     return TRUE; /* The item is not unique, or some other error occurred. */
23614 
23615 
23616   /* Check whether the resolved field is not ambiguos. */
23617   if (select_item != not_found_item)
23618   {
23619     Item *view_ref= NULL;
23620     /*
23621       If we have found field not by its alias in select list but by its
23622       original field name, we should additionally check if we have conflict
23623       for this name (in case if we would perform lookup in all tables).
23624     */
23625     if (resolution == RESOLVED_BEHIND_ALIAS &&
23626         order_item->fix_fields_if_needed_for_order_by(thd, order->item))
23627       return TRUE;
23628 
23629     /* Lookup the current GROUP field in the FROM clause. */
23630     order_item_type= order_item->type();
23631     from_field= (Field*) not_found_field;
23632     if ((is_group_field && order_item_type == Item::FIELD_ITEM) ||
23633         order_item_type == Item::REF_ITEM)
23634     {
23635       from_field= find_field_in_tables(thd, (Item_ident*) order_item, tables,
23636                                        NULL, &view_ref, IGNORE_ERRORS, FALSE,
23637                                        FALSE);
23638       if (!from_field)
23639         from_field= (Field*) not_found_field;
23640     }
23641 
23642     if (from_field == not_found_field ||
23643         (from_field != view_ref_found ?
23644          /* it is field of base table => check that fields are same */
23645          ((*select_item)->type() == Item::FIELD_ITEM &&
23646           ((Item_field*) (*select_item))->field->eq(from_field)) :
23647          /*
23648            in is field of view table => check that references on translation
23649            table are same
23650          */
23651          ((*select_item)->type() == Item::REF_ITEM &&
23652           view_ref->type() == Item::REF_ITEM &&
23653           ((Item_ref *) (*select_item))->ref ==
23654           ((Item_ref *) view_ref)->ref)))
23655     {
23656       /*
23657         If there is no such field in the FROM clause, or it is the same field
23658         as the one found in the SELECT clause, then use the Item created for
23659         the SELECT field. As a result if there was a derived field that
23660         'shadowed' a table field with the same name, the table field will be
23661         chosen over the derived field.
23662       */
23663       order->item= &ref_pointer_array[counter];
23664       order->in_field_list=1;
23665       return FALSE;
23666     }
23667     else
23668     {
23669       /*
23670         There is a field with the same name in the FROM clause. This
23671         is the field that will be chosen. In this case we issue a
23672         warning so the user knows that the field from the FROM clause
23673         overshadows the column reference from the SELECT list.
23674       */
23675       push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
23676                           ER_NON_UNIQ_ERROR,
23677                           ER_THD(thd, ER_NON_UNIQ_ERROR),
23678                           ((Item_ident*) order_item)->field_name.str,
23679                           thd->where);
23680     }
23681   }
23682   else if (from_window_spec)
23683   {
23684     Item **found_item= find_item_in_list(order_item, all_fields, &counter,
23685                                          REPORT_EXCEPT_NOT_FOUND, &resolution,
23686                                          all_fields.elements - fields.elements);
23687     if (found_item != not_found_item)
23688     {
23689       order->item= &ref_pointer_array[all_fields.elements-1-counter];
23690       order->in_field_list= 0;
23691       return FALSE;
23692     }
23693   }
23694 
23695   order->in_field_list=0;
23696   /*
23697     The call to order_item->fix_fields() means that here we resolve
23698     'order_item' to a column from a table in the list 'tables', or to
23699     a column in some outer query. Exactly because of the second case
23700     we come to this point even if (select_item == not_found_item),
23701     inspite of that fix_fields() calls find_item_in_list() one more
23702     time.
23703 
23704     We check order_item->fixed because Item_func_group_concat can put
23705     arguments for which fix_fields already was called.
23706   */
23707   if (order_item->fix_fields_if_needed_for_order_by(thd, order->item) ||
23708       thd->is_error())
23709     return TRUE; /* Wrong field. */
23710   order_item= *order->item; // Item can change during fix_fields()
23711 
23712   if (!add_to_all_fields)
23713     return FALSE;
23714 
23715   uint el= all_fields.elements;
23716  /* Add new field to field list. */
23717   all_fields.push_front(order_item, thd->mem_root);
23718   ref_pointer_array[el]= order_item;
23719   /*
23720      If the order_item is a SUM_FUNC_ITEM, when fix_fields is called
23721      ref_by is set to order->item which is the address of order_item.
23722      But this needs to be address of order_item in the all_fields list.
23723      As a result, when it gets replaced with Item_aggregate_ref
23724      object in Item::split_sum_func2, we will be able to retrieve the
23725      newly created object.
23726   */
23727   if (order_item->type() == Item::SUM_FUNC_ITEM)
23728     ((Item_sum *)order_item)->ref_by= all_fields.head_ref();
23729 
23730   order->item= &ref_pointer_array[el];
23731   return FALSE;
23732 }
23733 
23734 
23735 /**
23736   Change order to point at item in select list.
23737 
23738   If item isn't a number and doesn't exits in the select list, add it the
23739   the field list.
23740 */
23741 
setup_order(THD * thd,Ref_ptr_array ref_pointer_array,TABLE_LIST * tables,List<Item> & fields,List<Item> & all_fields,ORDER * order,bool from_window_spec)23742 int setup_order(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables,
23743                 List<Item> &fields, List<Item> &all_fields, ORDER *order,
23744                 bool from_window_spec)
23745 {
23746   SELECT_LEX *select = thd->lex->current_select;
23747   enum_parsing_place context_analysis_place=
23748                      thd->lex->current_select->context_analysis_place;
23749   thd->where="order clause";
23750   const bool for_union= select->master_unit()->is_unit_op() &&
23751     select == select->master_unit()->fake_select_lex;
23752   for (uint number = 1; order; order=order->next, number++)
23753   {
23754     if (find_order_in_list(thd, ref_pointer_array, tables, order, fields,
23755                            all_fields, false, true, from_window_spec))
23756       return 1;
23757     if ((*order->item)->with_window_func &&
23758         context_analysis_place != IN_ORDER_BY)
23759     {
23760       my_error(ER_WINDOW_FUNCTION_IN_WINDOW_SPEC, MYF(0));
23761       return 1;
23762     }
23763 
23764     /*
23765       UNION queries cannot be used with an aggregate function in
23766       an ORDER BY clause
23767     */
23768 
23769     if (for_union && (*order->item)->with_sum_func)
23770     {
23771       my_error(ER_AGGREGATE_ORDER_FOR_UNION, MYF(0), number);
23772       return 1;
23773     }
23774 
23775     if (from_window_spec && (*order->item)->with_sum_func &&
23776         (*order->item)->type() != Item::SUM_FUNC_ITEM)
23777       (*order->item)->split_sum_func(thd, ref_pointer_array,
23778                                      all_fields, SPLIT_SUM_SELECT);
23779   }
23780   return 0;
23781 }
23782 
23783 
23784 /**
23785   Intitialize the GROUP BY list.
23786 
23787   @param thd		       Thread handler
23788   @param ref_pointer_array     We store references to all fields that was
23789                                not in 'fields' here.
23790   @param fields		       All fields in the select part. Any item in
23791                                'order' that is part of these list is replaced
23792                                by a pointer to this fields.
23793   @param all_fields	       Total list of all unique fields used by the
23794                                select. All items in 'order' that was not part
23795                                of fields will be added first to this list.
23796   @param order		       The fields we should do GROUP/PARTITION BY on
23797   @param hidden_group_fields   Pointer to flag that is set to 1 if we added
23798                                any fields to all_fields.
23799   @param from_window_spec      If true then list is from a window spec
23800 
23801   @todo
23802     change ER_WRONG_FIELD_WITH_GROUP to more detailed
23803     ER_NON_GROUPING_FIELD_USED
23804 
23805   @retval
23806     0  ok
23807   @retval
23808     1  error (probably out of memory)
23809 */
23810 
23811 int
setup_group(THD * thd,Ref_ptr_array ref_pointer_array,TABLE_LIST * tables,List<Item> & fields,List<Item> & all_fields,ORDER * order,bool * hidden_group_fields,bool from_window_spec)23812 setup_group(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables,
23813 	    List<Item> &fields, List<Item> &all_fields, ORDER *order,
23814 	    bool *hidden_group_fields, bool from_window_spec)
23815 {
23816   enum_parsing_place context_analysis_place=
23817                      thd->lex->current_select->context_analysis_place;
23818   *hidden_group_fields=0;
23819   ORDER *ord;
23820 
23821   if (!order)
23822     return 0;				/* Everything is ok */
23823 
23824   uint org_fields=all_fields.elements;
23825 
23826   thd->where="group statement";
23827   for (ord= order; ord; ord= ord->next)
23828   {
23829     if (find_order_in_list(thd, ref_pointer_array, tables, ord, fields,
23830                            all_fields, true, true, from_window_spec))
23831       return 1;
23832     (*ord->item)->marker= UNDEF_POS;		/* Mark found */
23833     if ((*ord->item)->with_sum_func && context_analysis_place == IN_GROUP_BY)
23834     {
23835       my_error(ER_WRONG_GROUP_FIELD, MYF(0), (*ord->item)->full_name());
23836       return 1;
23837     }
23838     if ((*ord->item)->with_window_func)
23839     {
23840       if (context_analysis_place == IN_GROUP_BY)
23841         my_error(ER_WRONG_PLACEMENT_OF_WINDOW_FUNCTION, MYF(0));
23842       else
23843         my_error(ER_WINDOW_FUNCTION_IN_WINDOW_SPEC, MYF(0));
23844       return 1;
23845     }
23846     if (from_window_spec && (*ord->item)->with_sum_func &&
23847         (*ord->item)->type() != Item::SUM_FUNC_ITEM)
23848       (*ord->item)->split_sum_func(thd, ref_pointer_array,
23849                                    all_fields, SPLIT_SUM_SELECT);
23850   }
23851   if (thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY &&
23852       context_analysis_place == IN_GROUP_BY)
23853   {
23854     /*
23855       Don't allow one to use fields that is not used in GROUP BY
23856       For each select a list of field references that aren't under an
23857       aggregate function is created. Each field in this list keeps the
23858       position of the select list expression which it belongs to.
23859 
23860       First we check an expression from the select list against the GROUP BY
23861       list. If it's found there then it's ok. It's also ok if this expression
23862       is a constant or an aggregate function. Otherwise we scan the list
23863       of non-aggregated fields and if we'll find at least one field reference
23864       that belongs to this expression and doesn't occur in the GROUP BY list
23865       we throw an error. If there are no fields in the created list for a
23866       select list expression this means that all fields in it are used under
23867       aggregate functions.
23868     */
23869     Item *item;
23870     Item_field *field;
23871     int cur_pos_in_select_list= 0;
23872     List_iterator<Item> li(fields);
23873     List_iterator<Item_field> naf_it(thd->lex->current_select->join->non_agg_fields);
23874 
23875     field= naf_it++;
23876     while (field && (item=li++))
23877     {
23878       if (item->type() != Item::SUM_FUNC_ITEM && item->marker >= 0 &&
23879           !item->const_item() &&
23880           !(item->real_item()->type() == Item::FIELD_ITEM &&
23881             item->used_tables() & OUTER_REF_TABLE_BIT))
23882       {
23883         while (field)
23884         {
23885           /* Skip fields from previous expressions. */
23886           if (field->marker < cur_pos_in_select_list)
23887             goto next_field;
23888           /* Found a field from the next expression. */
23889           if (field->marker > cur_pos_in_select_list)
23890             break;
23891           /*
23892             Check whether the field occur in the GROUP BY list.
23893             Throw the error later if the field isn't found.
23894           */
23895           for (ord= order; ord; ord= ord->next)
23896             if ((*ord->item)->eq((Item*)field, 0))
23897               goto next_field;
23898           /*
23899             TODO: change ER_WRONG_FIELD_WITH_GROUP to more detailed
23900             ER_NON_GROUPING_FIELD_USED
23901           */
23902           my_error(ER_WRONG_FIELD_WITH_GROUP, MYF(0), field->full_name());
23903           return 1;
23904 next_field:
23905           field= naf_it++;
23906         }
23907       }
23908       cur_pos_in_select_list++;
23909     }
23910   }
23911   if (org_fields != all_fields.elements)
23912     *hidden_group_fields=1;			// group fields is not used
23913   return 0;
23914 }
23915 
23916 /**
23917   Add fields with aren't used at start of field list.
23918 
23919   @return
23920     FALSE if ok
23921 */
23922 
23923 static bool
setup_new_fields(THD * thd,List<Item> & fields,List<Item> & all_fields,ORDER * new_field)23924 setup_new_fields(THD *thd, List<Item> &fields,
23925 		 List<Item> &all_fields, ORDER *new_field)
23926 {
23927   Item	  **item;
23928   uint counter;
23929   enum_resolution_type not_used;
23930   DBUG_ENTER("setup_new_fields");
23931 
23932   thd->column_usage= MARK_COLUMNS_READ;       // Not really needed, but...
23933   for (; new_field ; new_field= new_field->next)
23934   {
23935     if ((item= find_item_in_list(*new_field->item, fields, &counter,
23936 				 IGNORE_ERRORS, &not_used)))
23937       new_field->item=item;			/* Change to shared Item */
23938     else
23939     {
23940       thd->where="procedure list";
23941       if ((*new_field->item)->fix_fields(thd, new_field->item))
23942 	DBUG_RETURN(1); /* purecov: inspected */
23943       all_fields.push_front(*new_field->item, thd->mem_root);
23944       new_field->item=all_fields.head_ref();
23945     }
23946   }
23947   DBUG_RETURN(0);
23948 }
23949 
23950 /**
23951   Create a group by that consist of all non const fields.
23952 
23953   Try to use the fields in the order given by 'order' to allow one to
23954   optimize away 'order by'.
23955 
23956   @retval
23957     0 OOM error if thd->is_fatal_error is set. Otherwise group was eliminated
23958     # Pointer to new group
23959 */
23960 
23961 ORDER *
create_distinct_group(THD * thd,Ref_ptr_array ref_pointer_array,ORDER * order_list,List<Item> & fields,List<Item> & all_fields,bool * all_order_by_fields_used)23962 create_distinct_group(THD *thd, Ref_ptr_array ref_pointer_array,
23963                       ORDER *order_list, List<Item> &fields,
23964                       List<Item> &all_fields,
23965 		      bool *all_order_by_fields_used)
23966 {
23967   List_iterator<Item> li(fields);
23968   Item *item;
23969   Ref_ptr_array orig_ref_pointer_array= ref_pointer_array;
23970   ORDER *order,*group,**prev;
23971   uint idx= 0;
23972 
23973   *all_order_by_fields_used= 1;
23974   while ((item=li++))
23975     item->marker=0;			/* Marker that field is not used */
23976 
23977   prev= &group;  group=0;
23978   for (order=order_list ; order; order=order->next)
23979   {
23980     if (order->in_field_list)
23981     {
23982       ORDER *ord=(ORDER*) thd->memdup((char*) order,sizeof(ORDER));
23983       if (!ord)
23984 	return 0;
23985       *prev=ord;
23986       prev= &ord->next;
23987       (*ord->item)->marker=1;
23988     }
23989     else
23990       *all_order_by_fields_used= 0;
23991   }
23992 
23993   li.rewind();
23994   while ((item=li++))
23995   {
23996     if (!item->const_item() && !item->with_sum_func && !item->marker)
23997     {
23998       /*
23999         Don't put duplicate columns from the SELECT list into the
24000         GROUP BY list.
24001       */
24002       ORDER *ord_iter;
24003       for (ord_iter= group; ord_iter; ord_iter= ord_iter->next)
24004         if ((*ord_iter->item)->eq(item, 1))
24005           goto next_item;
24006 
24007       ORDER *ord=(ORDER*) thd->calloc(sizeof(ORDER));
24008       if (!ord)
24009 	return 0;
24010 
24011       if (item->type() == Item::FIELD_ITEM &&
24012           item->field_type() == MYSQL_TYPE_BIT)
24013       {
24014         /*
24015           Because HEAP tables can't index BIT fields we need to use an
24016           additional hidden field for grouping because later it will be
24017           converted to a LONG field. Original field will remain of the
24018           BIT type and will be returned [el]client.
24019         */
24020         Item_field *new_item= new (thd->mem_root) Item_field(thd, (Item_field*)item);
24021         if (!new_item)
24022           return 0;
24023         int el= all_fields.elements;
24024         orig_ref_pointer_array[el]= new_item;
24025         all_fields.push_front(new_item, thd->mem_root);
24026         ord->item=&orig_ref_pointer_array[el];
24027      }
24028       else
24029       {
24030         /*
24031           We have here only field_list (not all_field_list), so we can use
24032           simple indexing of ref_pointer_array (order in the array and in the
24033           list are same)
24034         */
24035         ord->item= &ref_pointer_array[idx];
24036       }
24037       ord->direction= ORDER::ORDER_ASC;
24038       *prev=ord;
24039       prev= &ord->next;
24040     }
24041 next_item:
24042     idx++;
24043   }
24044   *prev=0;
24045   return group;
24046 }
24047 
24048 
24049 /**
24050   Update join with count of the different type of fields.
24051 */
24052 
24053 void
count_field_types(SELECT_LEX * select_lex,TMP_TABLE_PARAM * param,List<Item> & fields,bool reset_with_sum_func)24054 count_field_types(SELECT_LEX *select_lex, TMP_TABLE_PARAM *param,
24055                   List<Item> &fields, bool reset_with_sum_func)
24056 {
24057   List_iterator<Item> li(fields);
24058   Item *field;
24059 
24060   param->field_count=param->sum_func_count=param->func_count=
24061     param->hidden_field_count=0;
24062   param->quick_group=1;
24063   while ((field=li++))
24064   {
24065     Item::Type real_type= field->real_item()->type();
24066     if (real_type == Item::FIELD_ITEM)
24067       param->field_count++;
24068     else if (real_type == Item::SUM_FUNC_ITEM)
24069     {
24070       if (! field->const_item())
24071       {
24072 	Item_sum *sum_item=(Item_sum*) field->real_item();
24073         if (!sum_item->depended_from() ||
24074             sum_item->depended_from() == select_lex)
24075         {
24076           if (!sum_item->quick_group)
24077             param->quick_group=0;			// UDF SUM function
24078           param->sum_func_count++;
24079 
24080           for (uint i=0 ; i < sum_item->get_arg_count() ; i++)
24081           {
24082             if (sum_item->get_arg(i)->real_item()->type() == Item::FIELD_ITEM)
24083               param->field_count++;
24084             else
24085               param->func_count++;
24086           }
24087         }
24088         param->func_count++;
24089       }
24090     }
24091     else
24092     {
24093       param->func_count++;
24094       if (reset_with_sum_func)
24095 	field->with_sum_func=0;
24096     }
24097   }
24098 }
24099 
24100 
24101 /**
24102   Return 1 if second is a subpart of first argument.
24103 
24104   If first parts has different direction, change it to second part
24105   (group is sorted like order)
24106 */
24107 
24108 static bool
test_if_subpart(ORDER * a,ORDER * b)24109 test_if_subpart(ORDER *a,ORDER *b)
24110 {
24111   for (; a && b; a=a->next,b=b->next)
24112   {
24113     if ((*a->item)->eq(*b->item,1))
24114       a->direction=b->direction;
24115     else
24116       return 0;
24117   }
24118   return MY_TEST(!b);
24119 }
24120 
24121 /**
24122   Return table number if there is only one table in sort order
24123   and group and order is compatible, else return 0.
24124 */
24125 
24126 static TABLE *
get_sort_by_table(ORDER * a,ORDER * b,List<TABLE_LIST> & tables,table_map const_tables)24127 get_sort_by_table(ORDER *a,ORDER *b, List<TABLE_LIST> &tables,
24128                   table_map const_tables)
24129 {
24130   TABLE_LIST *table;
24131   List_iterator<TABLE_LIST> ti(tables);
24132   table_map map= (table_map) 0;
24133   DBUG_ENTER("get_sort_by_table");
24134 
24135   if (!a)
24136     a=b;					// Only one need to be given
24137   else if (!b)
24138     b=a;
24139 
24140   for (; a && b; a=a->next,b=b->next)
24141   {
24142     /* Skip elements of a that are constant */
24143     while (!((*a->item)->used_tables() & ~const_tables))
24144     {
24145       if (!(a= a->next))
24146         break;
24147     }
24148 
24149     /* Skip elements of b that are constant */
24150     while (!((*b->item)->used_tables() & ~const_tables))
24151     {
24152       if (!(b= b->next))
24153         break;
24154     }
24155 
24156     if (!a || !b)
24157       break;
24158 
24159     if (!(*a->item)->eq(*b->item,1))
24160       DBUG_RETURN(0);
24161     map|=a->item[0]->used_tables();
24162   }
24163   if (!map || (map & (RAND_TABLE_BIT | OUTER_REF_TABLE_BIT)))
24164     DBUG_RETURN(0);
24165 
24166   map&= ~const_tables;
24167   while ((table= ti++) && !(map & table->table->map)) ;
24168   if (map != table->table->map)
24169     DBUG_RETURN(0);				// More than one table
24170   DBUG_PRINT("exit",("sort by table: %d",table->table->tablenr));
24171   DBUG_RETURN(table->table);
24172 }
24173 
24174 
24175 /**
24176   calc how big buffer we need for comparing group entries.
24177 */
24178 
calc_group_buffer(TMP_TABLE_PARAM * param,ORDER * group)24179 void calc_group_buffer(TMP_TABLE_PARAM *param, ORDER *group)
24180 {
24181   uint key_length=0, parts=0, null_parts=0;
24182 
24183   for (; group ; group=group->next)
24184   {
24185     Item *group_item= *group->item;
24186     Field *field= group_item->get_tmp_table_field();
24187     if (field)
24188     {
24189       enum_field_types type;
24190       if ((type= field->type()) == MYSQL_TYPE_BLOB)
24191 	key_length+=MAX_BLOB_WIDTH;		// Can't be used as a key
24192       else if (type == MYSQL_TYPE_VARCHAR || type == MYSQL_TYPE_VAR_STRING)
24193         key_length+= field->field_length + HA_KEY_BLOB_LENGTH;
24194       else if (type == MYSQL_TYPE_BIT)
24195       {
24196         /* Bit is usually stored as a longlong key for group fields */
24197         key_length+= 8;                         // Big enough
24198       }
24199       else
24200 	key_length+= field->pack_length();
24201     }
24202     else
24203     {
24204       switch (group_item->cmp_type()) {
24205       case REAL_RESULT:
24206         key_length+= sizeof(double);
24207         break;
24208       case INT_RESULT:
24209         key_length+= sizeof(longlong);
24210         break;
24211       case DECIMAL_RESULT:
24212         key_length+= my_decimal_get_binary_size(group_item->max_length -
24213                                                 (group_item->decimals ? 1 : 0),
24214                                                 group_item->decimals);
24215         break;
24216       case TIME_RESULT:
24217       {
24218         /*
24219           As items represented as DATE/TIME fields in the group buffer
24220           have STRING_RESULT result type, we increase the length
24221           by 8 as maximum pack length of such fields.
24222         */
24223         key_length+= 8;
24224         break;
24225       }
24226       case STRING_RESULT:
24227       {
24228         enum enum_field_types type= group_item->field_type();
24229         if (type == MYSQL_TYPE_BLOB)
24230           key_length+= MAX_BLOB_WIDTH;		// Can't be used as a key
24231         else
24232         {
24233           /*
24234             Group strings are taken as varstrings and require an length field.
24235             A field is not yet created by create_tmp_field()
24236             and the sizes should match up.
24237           */
24238           key_length+= group_item->max_length + HA_KEY_BLOB_LENGTH;
24239         }
24240         break;
24241       }
24242       default:
24243         /* This case should never be choosen */
24244         DBUG_ASSERT(0);
24245         my_error(ER_OUT_OF_RESOURCES, MYF(ME_FATALERROR));
24246       }
24247     }
24248     parts++;
24249     if (group_item->maybe_null)
24250       null_parts++;
24251   }
24252   param->group_length= key_length + null_parts;
24253   param->group_parts= parts;
24254   param->group_null_parts= null_parts;
24255 }
24256 
calc_group_buffer(JOIN * join,ORDER * group)24257 static void calc_group_buffer(JOIN *join, ORDER *group)
24258 {
24259   if (group)
24260     join->group= 1;
24261   calc_group_buffer(&join->tmp_table_param, group);
24262 }
24263 
24264 
24265 /**
24266   allocate group fields or take prepared (cached).
24267 
24268   @param main_join   join of current select
24269   @param curr_join   current join (join of current select or temporary copy
24270                      of it)
24271 
24272   @retval
24273     0   ok
24274   @retval
24275     1   failed
24276 */
24277 
24278 static bool
make_group_fields(JOIN * main_join,JOIN * curr_join)24279 make_group_fields(JOIN *main_join, JOIN *curr_join)
24280 {
24281   if (main_join->group_fields_cache.elements)
24282   {
24283     curr_join->group_fields= main_join->group_fields_cache;
24284     curr_join->sort_and_group= 1;
24285   }
24286   else
24287   {
24288     if (alloc_group_fields(curr_join, curr_join->group_list))
24289       return (1);
24290     main_join->group_fields_cache= curr_join->group_fields;
24291   }
24292   return (0);
24293 }
24294 
24295 
24296 /**
24297   Get a list of buffers for saving last group.
24298 
24299   Groups are saved in reverse order for easier check loop.
24300 */
24301 
24302 static bool
alloc_group_fields(JOIN * join,ORDER * group)24303 alloc_group_fields(JOIN *join,ORDER *group)
24304 {
24305   if (group)
24306   {
24307     for (; group ; group=group->next)
24308     {
24309       Cached_item *tmp=new_Cached_item(join->thd, *group->item, TRUE);
24310       if (!tmp || join->group_fields.push_front(tmp))
24311 	return TRUE;
24312     }
24313   }
24314   join->sort_and_group=1;			/* Mark for do_select */
24315   return FALSE;
24316 }
24317 
24318 
24319 
24320 /*
24321   Test if a single-row cache of items changed, and update the cache.
24322 
24323   @details Test if a list of items that typically represents a result
24324   row has changed. If the value of some item changed, update the cached
24325   value for this item.
24326 
24327   @param list list of <item, cached_value> pairs stored as Cached_item.
24328 
24329   @return -1 if no item changed
24330   @return index of the first item that changed
24331 */
24332 
test_if_item_cache_changed(List<Cached_item> & list)24333 int test_if_item_cache_changed(List<Cached_item> &list)
24334 {
24335   DBUG_ENTER("test_if_item_cache_changed");
24336   List_iterator<Cached_item> li(list);
24337   int idx= -1,i;
24338   Cached_item *buff;
24339 
24340   for (i=(int) list.elements-1 ; (buff=li++) ; i--)
24341   {
24342     if (buff->cmp())
24343       idx=i;
24344   }
24345   DBUG_PRINT("info", ("idx: %d", idx));
24346   DBUG_RETURN(idx);
24347 }
24348 
24349 
24350 /*
24351   @return
24352     -1         - Group not changed
24353    value>=0    - Number of the component where the group changed
24354 */
24355 
24356 int
test_if_group_changed(List<Cached_item> & list)24357 test_if_group_changed(List<Cached_item> &list)
24358 {
24359   DBUG_ENTER("test_if_group_changed");
24360   List_iterator<Cached_item> li(list);
24361   int idx= -1,i;
24362   Cached_item *buff;
24363 
24364   for (i=(int) list.elements-1 ; (buff=li++) ; i--)
24365   {
24366     if (buff->cmp())
24367       idx=i;
24368   }
24369   DBUG_PRINT("info", ("idx: %d", idx));
24370   DBUG_RETURN(idx);
24371 }
24372 
24373 
24374 /**
24375   Setup copy_fields to save fields at start of new group.
24376 
24377   Setup copy_fields to save fields at start of new group
24378 
24379   Only FIELD_ITEM:s and FUNC_ITEM:s needs to be saved between groups.
24380   Change old item_field to use a new field with points at saved fieldvalue
24381   This function is only called before use of send_result_set_metadata.
24382 
24383   @param thd                   THD pointer
24384   @param param                 temporary table parameters
24385   @param ref_pointer_array     array of pointers to top elements of filed list
24386   @param res_selected_fields   new list of items of select item list
24387   @param res_all_fields        new list of all items
24388   @param elements              number of elements in select item list
24389   @param all_fields            all fields list
24390 
24391   @todo
24392     In most cases this result will be sent to the user.
24393     This should be changed to use copy_int or copy_real depending
24394     on how the value is to be used: In some cases this may be an
24395     argument in a group function, like: IF(ISNULL(col),0,COUNT(*))
24396 
24397   @retval
24398     0     ok
24399   @retval
24400     !=0   error
24401 */
24402 
24403 bool
setup_copy_fields(THD * thd,TMP_TABLE_PARAM * param,Ref_ptr_array ref_pointer_array,List<Item> & res_selected_fields,List<Item> & res_all_fields,uint elements,List<Item> & all_fields)24404 setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
24405 		  Ref_ptr_array ref_pointer_array,
24406 		  List<Item> &res_selected_fields, List<Item> &res_all_fields,
24407 		  uint elements, List<Item> &all_fields)
24408 {
24409   Item *pos;
24410   List_iterator_fast<Item> li(all_fields);
24411   Copy_field *copy= NULL;
24412   Copy_field *copy_start __attribute__((unused));
24413   res_selected_fields.empty();
24414   res_all_fields.empty();
24415   List_iterator_fast<Item> itr(res_all_fields);
24416   List<Item> extra_funcs;
24417   uint i, border= all_fields.elements - elements;
24418   DBUG_ENTER("setup_copy_fields");
24419 
24420   if (param->field_count &&
24421       !(copy=param->copy_field= new (thd->mem_root) Copy_field[param->field_count]))
24422     goto err2;
24423 
24424   param->copy_funcs.empty();
24425   copy_start= copy;
24426   for (i= 0; (pos= li++); i++)
24427   {
24428     Field *field;
24429     uchar *tmp;
24430     Item *real_pos= pos->real_item();
24431     /*
24432       Aggregate functions can be substituted for fields (by e.g. temp tables).
24433       We need to filter those substituted fields out.
24434     */
24435     if (real_pos->type() == Item::FIELD_ITEM &&
24436         !(real_pos != pos &&
24437           ((Item_ref *)pos)->ref_type() == Item_ref::AGGREGATE_REF))
24438     {
24439       Item_field *item;
24440       if (!(item= new (thd->mem_root) Item_field(thd, ((Item_field*) real_pos))))
24441 	goto err;
24442       if (pos->type() == Item::REF_ITEM)
24443       {
24444         /* preserve the names of the ref when dereferncing */
24445         Item_ref *ref= (Item_ref *) pos;
24446         item->db_name= ref->db_name;
24447         item->table_name= ref->table_name;
24448         item->name= ref->name;
24449       }
24450       pos= item;
24451       if (item->field->flags & BLOB_FLAG)
24452       {
24453 	if (!(pos= new (thd->mem_root) Item_copy_string(thd, pos)))
24454 	  goto err;
24455        /*
24456          Item_copy_string::copy for function can call
24457          Item_copy_string::val_int for blob via Item_ref.
24458          But if Item_copy_string::copy for blob isn't called before,
24459          it's value will be wrong
24460          so let's insert Item_copy_string for blobs in the beginning of
24461          copy_funcs
24462          (to see full test case look at having.test, BUG #4358)
24463        */
24464 	if (param->copy_funcs.push_front(pos, thd->mem_root))
24465 	  goto err;
24466       }
24467       else
24468       {
24469 	/*
24470 	   set up save buffer and change result_field to point at
24471 	   saved value
24472 	*/
24473 	field= item->field;
24474 	item->result_field=field->make_new_field(thd->mem_root,
24475                                                  field->table, 1);
24476         /*
24477           We need to allocate one extra byte for null handling and
24478           another extra byte to not get warnings from purify in
24479           Field_string::val_int
24480         */
24481 	if (!(tmp= (uchar*) thd->alloc(field->pack_length()+2)))
24482 	  goto err;
24483         if (copy)
24484         {
24485           DBUG_ASSERT (param->field_count > (uint) (copy - copy_start));
24486           copy->set(tmp, item->result_field);
24487           item->result_field->move_field(copy->to_ptr,copy->to_null_ptr,1);
24488 #ifdef HAVE_valgrind
24489           copy->to_ptr[copy->from_length]= 0;
24490 #endif
24491           copy++;
24492         }
24493       }
24494     }
24495     else if ((real_pos->type() == Item::FUNC_ITEM ||
24496 	      real_pos->real_type() == Item::SUBSELECT_ITEM ||
24497 	      real_pos->type() == Item::CACHE_ITEM ||
24498 	      real_pos->type() == Item::COND_ITEM) &&
24499 	     !real_pos->with_sum_func)
24500     {						// Save for send fields
24501       LEX_CSTRING real_name= pos->name;
24502       pos= real_pos;
24503       pos->name= real_name;
24504       /* TODO:
24505 	 In most cases this result will be sent to the user.
24506 	 This should be changed to use copy_int or copy_real depending
24507 	 on how the value is to be used: In some cases this may be an
24508 	 argument in a group function, like: IF(ISNULL(col),0,COUNT(*))
24509       */
24510       if (!(pos=new (thd->mem_root) Item_copy_string(thd, pos)))
24511 	goto err;
24512       if (i < border)                           // HAVING, ORDER and GROUP BY
24513       {
24514         if (extra_funcs.push_back(pos, thd->mem_root))
24515           goto err;
24516       }
24517       else if (param->copy_funcs.push_back(pos, thd->mem_root))
24518 	goto err;
24519     }
24520     res_all_fields.push_back(pos, thd->mem_root);
24521     ref_pointer_array[((i < border)? all_fields.elements-i-1 : i-border)]=
24522       pos;
24523   }
24524   param->copy_field_end= copy;
24525 
24526   for (i= 0; i < border; i++)
24527     itr++;
24528   itr.sublist(res_selected_fields, elements);
24529   /*
24530     Put elements from HAVING, ORDER BY and GROUP BY last to ensure that any
24531     reference used in these will resolve to a item that is already calculated
24532   */
24533   param->copy_funcs.append(&extra_funcs);
24534 
24535   DBUG_RETURN(0);
24536 
24537  err:
24538   if (copy)
24539     delete [] param->copy_field;			// This is never 0
24540   param->copy_field= 0;
24541 err2:
24542   DBUG_RETURN(TRUE);
24543 }
24544 
24545 
24546 /**
24547   Make a copy of all simple SELECT'ed items.
24548 
24549   This is done at the start of a new group so that we can retrieve
24550   these later when the group changes.
24551 */
24552 
24553 void
copy_fields(TMP_TABLE_PARAM * param)24554 copy_fields(TMP_TABLE_PARAM *param)
24555 {
24556   Copy_field *ptr=param->copy_field;
24557   Copy_field *end=param->copy_field_end;
24558 
24559   DBUG_ASSERT((ptr != NULL && end >= ptr) || (ptr == NULL && end == NULL));
24560 
24561   for (; ptr != end; ptr++)
24562     (*ptr->do_copy)(ptr);
24563 
24564   List_iterator_fast<Item> it(param->copy_funcs);
24565   Item_copy_string *item;
24566   while ((item = (Item_copy_string*) it++))
24567     item->copy();
24568 }
24569 
24570 
24571 /**
24572   Make an array of pointers to sum_functions to speed up
24573   sum_func calculation.
24574 
24575   @retval
24576     0	ok
24577   @retval
24578     1	Error
24579 */
24580 
alloc_func_list()24581 bool JOIN::alloc_func_list()
24582 {
24583   uint func_count, group_parts;
24584   DBUG_ENTER("alloc_func_list");
24585 
24586   func_count= tmp_table_param.sum_func_count;
24587   /*
24588     If we are using rollup, we need a copy of the summary functions for
24589     each level
24590   */
24591   if (rollup.state != ROLLUP::STATE_NONE)
24592     func_count*= (send_group_parts+1);
24593 
24594   group_parts= send_group_parts;
24595   /*
24596     If distinct, reserve memory for possible
24597     disctinct->group_by optimization
24598   */
24599   if (select_distinct)
24600   {
24601     group_parts+= fields_list.elements;
24602     /*
24603       If the ORDER clause is specified then it's possible that
24604       it also will be optimized, so reserve space for it too
24605     */
24606     if (order)
24607     {
24608       ORDER *ord;
24609       for (ord= order; ord; ord= ord->next)
24610         group_parts++;
24611     }
24612   }
24613 
24614   /* This must use calloc() as rollup_make_fields depends on this */
24615   sum_funcs= (Item_sum**) thd->calloc(sizeof(Item_sum**) * (func_count+1) +
24616 				      sizeof(Item_sum***) * (group_parts+1));
24617   sum_funcs_end= (Item_sum***) (sum_funcs+func_count+1);
24618   DBUG_RETURN(sum_funcs == 0);
24619 }
24620 
24621 
24622 /**
24623   Initialize 'sum_funcs' array with all Item_sum objects.
24624 
24625   @param field_list        All items
24626   @param send_result_set_metadata       Items in select list
24627   @param before_group_by   Set to 1 if this is called before GROUP BY handling
24628   @param recompute         Set to TRUE if sum_funcs must be recomputed
24629 
24630   @retval
24631     0  ok
24632   @retval
24633     1  error
24634 */
24635 
make_sum_func_list(List<Item> & field_list,List<Item> & send_result_set_metadata,bool before_group_by,bool recompute)24636 bool JOIN::make_sum_func_list(List<Item> &field_list,
24637                               List<Item> &send_result_set_metadata,
24638 			      bool before_group_by, bool recompute)
24639 {
24640   List_iterator_fast<Item> it(field_list);
24641   Item_sum **func;
24642   Item *item;
24643   DBUG_ENTER("make_sum_func_list");
24644 
24645   if (*sum_funcs && !recompute)
24646     DBUG_RETURN(FALSE); /* We have already initialized sum_funcs. */
24647 
24648   func= sum_funcs;
24649   while ((item=it++))
24650   {
24651     if (item->type() == Item::SUM_FUNC_ITEM && !item->const_item() &&
24652         (!((Item_sum*) item)->depended_from() ||
24653          ((Item_sum *)item)->depended_from() == select_lex))
24654       *func++= (Item_sum*) item;
24655   }
24656   if (before_group_by && rollup.state == ROLLUP::STATE_INITED)
24657   {
24658     rollup.state= ROLLUP::STATE_READY;
24659     if (rollup_make_fields(field_list, send_result_set_metadata, &func))
24660       DBUG_RETURN(TRUE);			// Should never happen
24661   }
24662   else if (rollup.state == ROLLUP::STATE_NONE)
24663   {
24664     for (uint i=0 ; i <= send_group_parts ;i++)
24665       sum_funcs_end[i]= func;
24666   }
24667   else if (rollup.state == ROLLUP::STATE_READY)
24668     DBUG_RETURN(FALSE);                         // Don't put end marker
24669   *func=0;					// End marker
24670   DBUG_RETURN(FALSE);
24671 }
24672 
24673 
24674 /**
24675   Change all funcs and sum_funcs to fields in tmp table, and create
24676   new list of all items.
24677 
24678   @param thd                   THD pointer
24679   @param ref_pointer_array     array of pointers to top elements of filed list
24680   @param res_selected_fields   new list of items of select item list
24681   @param res_all_fields        new list of all items
24682   @param elements              number of elements in select item list
24683   @param all_fields            all fields list
24684 
24685   @retval
24686     0     ok
24687   @retval
24688     !=0   error
24689 */
24690 
24691 static bool
change_to_use_tmp_fields(THD * thd,Ref_ptr_array ref_pointer_array,List<Item> & res_selected_fields,List<Item> & res_all_fields,uint elements,List<Item> & all_fields)24692 change_to_use_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array,
24693 			 List<Item> &res_selected_fields,
24694 			 List<Item> &res_all_fields,
24695 			 uint elements, List<Item> &all_fields)
24696 {
24697   List_iterator_fast<Item> it(all_fields);
24698   Item *item_field,*item;
24699   DBUG_ENTER("change_to_use_tmp_fields");
24700 
24701   res_selected_fields.empty();
24702   res_all_fields.empty();
24703 
24704   uint border= all_fields.elements - elements;
24705   for (uint i= 0; (item= it++); i++)
24706   {
24707     Field *field;
24708     if ((item->with_sum_func && item->type() != Item::SUM_FUNC_ITEM) ||
24709        item->with_window_func)
24710       item_field= item;
24711     else if (item->type() == Item::FIELD_ITEM)
24712     {
24713       if (!(item_field= item->get_tmp_table_item(thd)))
24714         DBUG_RETURN(true);
24715     }
24716     else if (item->type() == Item::FUNC_ITEM &&
24717              ((Item_func*)item)->functype() == Item_func::SUSERVAR_FUNC)
24718     {
24719       field= item->get_tmp_table_field();
24720       if (field != NULL)
24721       {
24722         /*
24723           Replace "@:=<expression>" with "@:=<tmp table
24724           column>". Otherwise, we would re-evaluate <expression>, and
24725           if expression were a subquery, this would access
24726           already-unlocked tables.
24727          */
24728         Item_func_set_user_var* suv=
24729           new (thd->mem_root) Item_func_set_user_var(thd, (Item_func_set_user_var*) item);
24730         Item_field *new_field= new (thd->mem_root) Item_temptable_field(thd, field);
24731         if (!suv || !new_field)
24732           DBUG_RETURN(true);                  // Fatal error
24733         List<Item> list;
24734         list.push_back(new_field, thd->mem_root);
24735         suv->set_arguments(thd, list);
24736         item_field= suv;
24737       }
24738       else
24739         item_field= item;
24740     }
24741     else if ((field= item->get_tmp_table_field()))
24742     {
24743       if (item->type() == Item::SUM_FUNC_ITEM && field->table->group)
24744         item_field= ((Item_sum*) item)->result_item(thd, field);
24745       else
24746         item_field= (Item *) new (thd->mem_root) Item_temptable_field(thd, field);
24747       if (!item_field)
24748         DBUG_RETURN(true);                    // Fatal error
24749 
24750       if (item->real_item()->type() != Item::FIELD_ITEM)
24751         field->orig_table= 0;
24752       item_field->name= item->name;
24753       if (item->type() == Item::REF_ITEM)
24754       {
24755         Item_field *ifield= (Item_field *) item_field;
24756         Item_ref *iref= (Item_ref *) item;
24757         ifield->table_name= iref->table_name;
24758         ifield->db_name= iref->db_name;
24759       }
24760 #ifndef DBUG_OFF
24761       if (!item_field->name.str)
24762       {
24763         char buff[256];
24764         String str(buff,sizeof(buff),&my_charset_bin);
24765         str.length(0);
24766         str.extra_allocation(1024);
24767         item->print(&str, QT_ORDINARY);
24768         item_field->name.str= thd->strmake(str.ptr(), str.length());
24769         item_field->name.length= str.length();
24770       }
24771 #endif
24772     }
24773     else
24774       item_field= item;
24775 
24776     res_all_fields.push_back(item_field, thd->mem_root);
24777     ref_pointer_array[((i < border)? all_fields.elements-i-1 : i-border)]=
24778       item_field;
24779   }
24780 
24781   List_iterator_fast<Item> itr(res_all_fields);
24782   for (uint i= 0; i < border; i++)
24783     itr++;
24784   itr.sublist(res_selected_fields, elements);
24785   DBUG_RETURN(false);
24786 }
24787 
24788 
24789 /**
24790   Change all sum_func refs to fields to point at fields in tmp table.
24791   Change all funcs to be fields in tmp table.
24792 
24793   @param thd                   THD pointer
24794   @param ref_pointer_array     array of pointers to top elements of filed list
24795   @param res_selected_fields   new list of items of select item list
24796   @param res_all_fields        new list of all items
24797   @param elements              number of elements in select item list
24798   @param all_fields            all fields list
24799 
24800   @retval
24801     0	ok
24802   @retval
24803     1	error
24804 */
24805 
24806 static bool
change_refs_to_tmp_fields(THD * thd,Ref_ptr_array ref_pointer_array,List<Item> & res_selected_fields,List<Item> & res_all_fields,uint elements,List<Item> & all_fields)24807 change_refs_to_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array,
24808 			  List<Item> &res_selected_fields,
24809 			  List<Item> &res_all_fields, uint elements,
24810 			  List<Item> &all_fields)
24811 {
24812   List_iterator_fast<Item> it(all_fields);
24813   Item *item, *new_item;
24814   res_selected_fields.empty();
24815   res_all_fields.empty();
24816 
24817   uint i, border= all_fields.elements - elements;
24818   for (i= 0; (item= it++); i++)
24819   {
24820     if (item->type() == Item::SUM_FUNC_ITEM && item->const_item())
24821       new_item= item;
24822     else
24823     {
24824       if (!(new_item= item->get_tmp_table_item(thd)))
24825         return 1;
24826     }
24827 
24828     if (res_all_fields.push_back(new_item, thd->mem_root))
24829       return 1;
24830     ref_pointer_array[((i < border)? all_fields.elements-i-1 : i-border)]=
24831       new_item;
24832   }
24833 
24834   List_iterator_fast<Item> itr(res_all_fields);
24835   for (i= 0; i < border; i++)
24836     itr++;
24837   itr.sublist(res_selected_fields, elements);
24838 
24839   return thd->is_fatal_error;
24840 }
24841 
24842 
24843 
24844 /******************************************************************************
24845   Code for calculating functions
24846 ******************************************************************************/
24847 
24848 
24849 /**
24850   Call ::setup for all sum functions.
24851 
24852   @param thd           thread handler
24853   @param func_ptr      sum function list
24854 
24855   @retval
24856     FALSE  ok
24857   @retval
24858     TRUE   error
24859 */
24860 
setup_sum_funcs(THD * thd,Item_sum ** func_ptr)24861 static bool setup_sum_funcs(THD *thd, Item_sum **func_ptr)
24862 {
24863   Item_sum *func;
24864   DBUG_ENTER("setup_sum_funcs");
24865   while ((func= *(func_ptr++)))
24866   {
24867     if (func->aggregator_setup(thd))
24868       DBUG_RETURN(TRUE);
24869   }
24870   DBUG_RETURN(FALSE);
24871 }
24872 
24873 
prepare_sum_aggregators(Item_sum ** func_ptr,bool need_distinct)24874 static bool prepare_sum_aggregators(Item_sum **func_ptr, bool need_distinct)
24875 {
24876   Item_sum *func;
24877   DBUG_ENTER("prepare_sum_aggregators");
24878   while ((func= *(func_ptr++)))
24879   {
24880     if (func->set_aggregator(need_distinct && func->has_with_distinct() ?
24881                              Aggregator::DISTINCT_AGGREGATOR :
24882                              Aggregator::SIMPLE_AGGREGATOR))
24883       DBUG_RETURN(TRUE);
24884   }
24885   DBUG_RETURN(FALSE);
24886 }
24887 
24888 
24889 static void
init_tmptable_sum_functions(Item_sum ** func_ptr)24890 init_tmptable_sum_functions(Item_sum **func_ptr)
24891 {
24892   Item_sum *func;
24893   while ((func= *(func_ptr++)))
24894     func->reset_field();
24895 }
24896 
24897 
24898 /** Update record 0 in tmp_table from record 1. */
24899 
24900 static void
update_tmptable_sum_func(Item_sum ** func_ptr,TABLE * tmp_table)24901 update_tmptable_sum_func(Item_sum **func_ptr,
24902 			 TABLE *tmp_table __attribute__((unused)))
24903 {
24904   Item_sum *func;
24905   while ((func= *(func_ptr++)))
24906     func->update_field();
24907 }
24908 
24909 
24910 /** Copy result of sum functions to record in tmp_table. */
24911 
24912 static void
copy_sum_funcs(Item_sum ** func_ptr,Item_sum ** end_ptr)24913 copy_sum_funcs(Item_sum **func_ptr, Item_sum **end_ptr)
24914 {
24915   for (; func_ptr != end_ptr ; func_ptr++)
24916     (void) (*func_ptr)->save_in_result_field(1);
24917   return;
24918 }
24919 
24920 
24921 static bool
init_sum_functions(Item_sum ** func_ptr,Item_sum ** end_ptr)24922 init_sum_functions(Item_sum **func_ptr, Item_sum **end_ptr)
24923 {
24924   for (; func_ptr != end_ptr ;func_ptr++)
24925   {
24926     if ((*func_ptr)->reset_and_add())
24927       return 1;
24928   }
24929   /* If rollup, calculate the upper sum levels */
24930   for ( ; *func_ptr ; func_ptr++)
24931   {
24932     if ((*func_ptr)->aggregator_add())
24933       return 1;
24934   }
24935   return 0;
24936 }
24937 
24938 
24939 static bool
update_sum_func(Item_sum ** func_ptr)24940 update_sum_func(Item_sum **func_ptr)
24941 {
24942   Item_sum *func;
24943   for (; (func= (Item_sum*) *func_ptr) ; func_ptr++)
24944     if (func->aggregator_add())
24945       return 1;
24946   return 0;
24947 }
24948 
24949 /**
24950   Copy result of functions to record in tmp_table.
24951 
24952   Uses the thread pointer to check for errors in
24953   some of the val_xxx() methods called by the
24954   save_in_result_field() function.
24955   TODO: make the Item::val_xxx() return error code
24956 
24957   @param func_ptr  array of the function Items to copy to the tmp table
24958   @param thd       pointer to the current thread for error checking
24959   @retval
24960     FALSE if OK
24961   @retval
24962     TRUE on error
24963 */
24964 
24965 bool
copy_funcs(Item ** func_ptr,const THD * thd)24966 copy_funcs(Item **func_ptr, const THD *thd)
24967 {
24968   Item *func;
24969   for (; (func = *func_ptr) ; func_ptr++)
24970   {
24971     if (func->type() == Item::FUNC_ITEM &&
24972         ((Item_func *) func)->with_window_func)
24973       continue;
24974     func->save_in_result_field(1);
24975     /*
24976       Need to check the THD error state because Item::val_xxx() don't
24977       return error code, but can generate errors
24978       TODO: change it for a real status check when Item::val_xxx()
24979       are extended to return status code.
24980     */
24981     if (unlikely(thd->is_error()))
24982       return TRUE;
24983   }
24984   return FALSE;
24985 }
24986 
24987 
24988 /**
24989   Create a condition for a const reference and add this to the
24990   currenct select for the table.
24991 */
24992 
add_ref_to_table_cond(THD * thd,JOIN_TAB * join_tab)24993 static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab)
24994 {
24995   DBUG_ENTER("add_ref_to_table_cond");
24996   if (!join_tab->ref.key_parts)
24997     DBUG_RETURN(FALSE);
24998 
24999   Item_cond_and *cond= new (thd->mem_root) Item_cond_and(thd);
25000   TABLE *table=join_tab->table;
25001   int error= 0;
25002   if (!cond)
25003     DBUG_RETURN(TRUE);
25004 
25005   for (uint i=0 ; i < join_tab->ref.key_parts ; i++)
25006   {
25007     Field *field=table->field[table->key_info[join_tab->ref.key].key_part[i].
25008 			      fieldnr-1];
25009     Item *value=join_tab->ref.items[i];
25010     cond->add(new (thd->mem_root)
25011               Item_func_equal(thd, new (thd->mem_root) Item_field(thd, field),
25012                               value),
25013               thd->mem_root);
25014   }
25015   if (unlikely(thd->is_fatal_error))
25016     DBUG_RETURN(TRUE);
25017   if (!cond->fixed)
25018   {
25019     Item *tmp_item= (Item*) cond;
25020     cond->fix_fields(thd, &tmp_item);
25021     DBUG_ASSERT(cond == tmp_item);
25022   }
25023   if (join_tab->select)
25024   {
25025     Item *UNINIT_VAR(cond_copy);
25026     if (join_tab->select->pre_idx_push_select_cond)
25027       cond_copy= cond->copy_andor_structure(thd);
25028     if (join_tab->select->cond)
25029       error=(int) cond->add(join_tab->select->cond, thd->mem_root);
25030     join_tab->select->cond= cond;
25031     if (join_tab->select->pre_idx_push_select_cond)
25032     {
25033       Item *new_cond= and_conds(thd, cond_copy,
25034                                 join_tab->select->pre_idx_push_select_cond);
25035       if (new_cond->fix_fields_if_needed(thd, &new_cond))
25036         error= 1;
25037       join_tab->pre_idx_push_select_cond=
25038         join_tab->select->pre_idx_push_select_cond= new_cond;
25039     }
25040     join_tab->set_select_cond(cond, __LINE__);
25041   }
25042   else if ((join_tab->select= make_select(join_tab->table, 0, 0, cond,
25043                                           (SORT_INFO*) 0, 0, &error)))
25044     join_tab->set_select_cond(cond, __LINE__);
25045 
25046   DBUG_RETURN(error ? TRUE : FALSE);
25047 }
25048 
25049 
25050 /**
25051   Free joins of subselect of this select.
25052 
25053   @param thd      THD pointer
25054   @param select   pointer to st_select_lex which subselects joins we will free
25055 */
25056 
free_underlaid_joins(THD * thd,SELECT_LEX * select)25057 void free_underlaid_joins(THD *thd, SELECT_LEX *select)
25058 {
25059   for (SELECT_LEX_UNIT *unit= select->first_inner_unit();
25060        unit;
25061        unit= unit->next_unit())
25062     unit->cleanup();
25063 }
25064 
25065 /****************************************************************************
25066   ROLLUP handling
25067 ****************************************************************************/
25068 
25069 /**
25070   Replace occurences of group by fields in an expression by ref items.
25071 
25072   The function replaces occurrences of group by fields in expr
25073   by ref objects for these fields unless they are under aggregate
25074   functions.
25075   The function also corrects value of the the maybe_null attribute
25076   for the items of all subexpressions containing group by fields.
25077 
25078   @b EXAMPLES
25079     @code
25080       SELECT a+1 FROM t1 GROUP BY a WITH ROLLUP
25081       SELECT SUM(a)+a FROM t1 GROUP BY a WITH ROLLUP
25082   @endcode
25083 
25084   @b IMPLEMENTATION
25085 
25086     The function recursively traverses the tree of the expr expression,
25087     looks for occurrences of the group by fields that are not under
25088     aggregate functions and replaces them for the corresponding ref items.
25089 
25090   @note
25091     This substitution is needed GROUP BY queries with ROLLUP if
25092     SELECT list contains expressions over group by attributes.
25093 
25094   @param thd                  reference to the context
25095   @param expr                 expression to make replacement
25096   @param group_list           list of references to group by items
25097   @param changed        out:  returns 1 if item contains a replaced field item
25098 
25099   @todo
25100     - TODO: Some functions are not null-preserving. For those functions
25101     updating of the maybe_null attribute is an overkill.
25102 
25103   @retval
25104     0	if ok
25105   @retval
25106     1   on error
25107 */
25108 
change_group_ref(THD * thd,Item_func * expr,ORDER * group_list,bool * changed)25109 static bool change_group_ref(THD *thd, Item_func *expr, ORDER *group_list,
25110                              bool *changed)
25111 {
25112   if (expr->argument_count())
25113   {
25114     Name_resolution_context *context= &thd->lex->current_select->context;
25115     Item **arg,**arg_end;
25116     bool arg_changed= FALSE;
25117     for (arg= expr->arguments(),
25118          arg_end= expr->arguments() + expr->argument_count();
25119          arg != arg_end; arg++)
25120     {
25121       Item *item= *arg;
25122       if (item->type() == Item::FIELD_ITEM || item->type() == Item::REF_ITEM)
25123       {
25124         ORDER *group_tmp;
25125         for (group_tmp= group_list; group_tmp; group_tmp= group_tmp->next)
25126         {
25127           if (item->eq(*group_tmp->item,0))
25128           {
25129             Item *new_item;
25130             if (!(new_item= new (thd->mem_root) Item_ref(thd, context,
25131                                                          group_tmp->item, 0,
25132                                                          &item->name)))
25133               return 1;                                 // fatal_error is set
25134             thd->change_item_tree(arg, new_item);
25135             arg_changed= TRUE;
25136           }
25137         }
25138       }
25139       else if (item->type() == Item::FUNC_ITEM)
25140       {
25141         if (change_group_ref(thd, (Item_func *) item, group_list, &arg_changed))
25142           return 1;
25143       }
25144     }
25145     if (arg_changed)
25146     {
25147       expr->maybe_null= 1;
25148       expr->in_rollup= 1;
25149       *changed= TRUE;
25150     }
25151   }
25152   return 0;
25153 }
25154 
25155 
25156 /** Allocate memory needed for other rollup functions. */
25157 
rollup_init()25158 bool JOIN::rollup_init()
25159 {
25160   uint i,j;
25161   Item **ref_array;
25162 
25163   tmp_table_param.quick_group= 0;	// Can't create groups in tmp table
25164   rollup.state= ROLLUP::STATE_INITED;
25165 
25166   /*
25167     Create pointers to the different sum function groups
25168     These are updated by rollup_make_fields()
25169   */
25170   tmp_table_param.group_parts= send_group_parts;
25171 
25172   Item_null_result **null_items=
25173     static_cast<Item_null_result**>(thd->alloc(sizeof(Item*)*send_group_parts));
25174 
25175   rollup.null_items= Item_null_array(null_items, send_group_parts);
25176   rollup.ref_pointer_arrays=
25177     static_cast<Ref_ptr_array*>
25178     (thd->alloc((sizeof(Ref_ptr_array) +
25179                  all_fields.elements * sizeof(Item*)) * send_group_parts));
25180   rollup.fields=
25181     static_cast<List<Item>*>(thd->alloc(sizeof(List<Item>) * send_group_parts));
25182 
25183   if (!null_items || !rollup.ref_pointer_arrays || !rollup.fields)
25184     return true;
25185 
25186   ref_array= (Item**) (rollup.ref_pointer_arrays+send_group_parts);
25187 
25188 
25189   /*
25190     Prepare space for field list for the different levels
25191     These will be filled up in rollup_make_fields()
25192   */
25193   for (i= 0 ; i < send_group_parts ; i++)
25194   {
25195     if (!(rollup.null_items[i]= new (thd->mem_root) Item_null_result(thd)))
25196       return true;
25197 
25198     List<Item> *rollup_fields= &rollup.fields[i];
25199     rollup_fields->empty();
25200     rollup.ref_pointer_arrays[i]= Ref_ptr_array(ref_array, all_fields.elements);
25201     ref_array+= all_fields.elements;
25202   }
25203   for (i= 0 ; i < send_group_parts; i++)
25204   {
25205     for (j=0 ; j < fields_list.elements ; j++)
25206       rollup.fields[i].push_back(rollup.null_items[i], thd->mem_root);
25207   }
25208   List_iterator<Item> it(all_fields);
25209   Item *item;
25210   while ((item= it++))
25211   {
25212     ORDER *group_tmp;
25213     bool found_in_group= 0;
25214 
25215     for (group_tmp= group_list; group_tmp; group_tmp= group_tmp->next)
25216     {
25217       if (*group_tmp->item == item)
25218       {
25219         item->maybe_null= 1;
25220         item->in_rollup= 1;
25221         found_in_group= 1;
25222         break;
25223       }
25224     }
25225     if (item->type() == Item::FUNC_ITEM && !found_in_group)
25226     {
25227       bool changed= FALSE;
25228       if (change_group_ref(thd, (Item_func *) item, group_list, &changed))
25229         return 1;
25230       /*
25231         We have to prevent creation of a field in a temporary table for
25232         an expression that contains GROUP BY attributes.
25233         Marking the expression item as 'with_sum_func' will ensure this.
25234       */
25235       if (changed)
25236         item->with_sum_func= 1;
25237     }
25238   }
25239   return 0;
25240 }
25241 
25242 /**
25243    Wrap all constant Items in GROUP BY list.
25244 
25245    For ROLLUP queries each constant item referenced in GROUP BY list
25246    is wrapped up into an Item_func object yielding the same value
25247    as the constant item. The objects of the wrapper class are never
25248    considered as constant items and besides they inherit all
25249    properties of the Item_result_field class.
25250    This wrapping allows us to ensure writing constant items
25251    into temporary tables whenever the result of the ROLLUP
25252    operation has to be written into a temporary table, e.g. when
25253    ROLLUP is used together with DISTINCT in the SELECT list.
25254    Usually when creating temporary tables for a intermidiate
25255    result we do not include fields for constant expressions.
25256 
25257    @retval
25258      0  if ok
25259    @retval
25260      1  on error
25261 */
25262 
rollup_process_const_fields()25263 bool JOIN::rollup_process_const_fields()
25264 {
25265   ORDER *group_tmp;
25266   Item *item;
25267   List_iterator<Item> it(all_fields);
25268 
25269   for (group_tmp= group_list; group_tmp; group_tmp= group_tmp->next)
25270   {
25271     if (!(*group_tmp->item)->const_item())
25272       continue;
25273     while ((item= it++))
25274     {
25275       if (*group_tmp->item == item)
25276       {
25277         Item* new_item= new (thd->mem_root) Item_func_rollup_const(thd, item);
25278         if (!new_item)
25279           return 1;
25280         new_item->fix_fields(thd, (Item **) 0);
25281         thd->change_item_tree(it.ref(), new_item);
25282         for (ORDER *tmp= group_tmp; tmp; tmp= tmp->next)
25283         {
25284           if (*tmp->item == item)
25285             thd->change_item_tree(tmp->item, new_item);
25286         }
25287         break;
25288       }
25289     }
25290     it.rewind();
25291   }
25292   return 0;
25293 }
25294 
25295 
25296 /**
25297   Fill up rollup structures with pointers to fields to use.
25298 
25299   Creates copies of item_sum items for each sum level.
25300 
25301   @param fields_arg		List of all fields (hidden and real ones)
25302   @param sel_fields		Pointer to selected fields
25303   @param func			Store here a pointer to all fields
25304 
25305   @retval
25306     0	if ok;
25307     In this case func is pointing to next not used element.
25308   @retval
25309     1    on error
25310 */
25311 
rollup_make_fields(List<Item> & fields_arg,List<Item> & sel_fields,Item_sum *** func)25312 bool JOIN::rollup_make_fields(List<Item> &fields_arg, List<Item> &sel_fields,
25313 			      Item_sum ***func)
25314 {
25315   List_iterator_fast<Item> it(fields_arg);
25316   Item *first_field= sel_fields.head();
25317   uint level;
25318 
25319   /*
25320     Create field lists for the different levels
25321 
25322     The idea here is to have a separate field list for each rollup level to
25323     avoid all runtime checks of which columns should be NULL.
25324 
25325     The list is stored in reverse order to get sum function in such an order
25326     in func that it makes it easy to reset them with init_sum_functions()
25327 
25328     Assuming:  SELECT a, b, c SUM(b) FROM t1 GROUP BY a,b WITH ROLLUP
25329 
25330     rollup.fields[0] will contain list where a,b,c is NULL
25331     rollup.fields[1] will contain list where b,c is NULL
25332     ...
25333     rollup.ref_pointer_array[#] points to fields for rollup.fields[#]
25334     ...
25335     sum_funcs_end[0] points to all sum functions
25336     sum_funcs_end[1] points to all sum functions, except grand totals
25337     ...
25338   */
25339 
25340   for (level=0 ; level < send_group_parts ; level++)
25341   {
25342     uint i;
25343     uint pos= send_group_parts - level -1;
25344     bool real_fields= 0;
25345     Item *item;
25346     List_iterator<Item> new_it(rollup.fields[pos]);
25347     Ref_ptr_array ref_array_start= rollup.ref_pointer_arrays[pos];
25348     ORDER *start_group;
25349 
25350     /* Point to first hidden field */
25351     uint ref_array_ix= fields_arg.elements-1;
25352 
25353 
25354     /* Remember where the sum functions ends for the previous level */
25355     sum_funcs_end[pos+1]= *func;
25356 
25357     /* Find the start of the group for this level */
25358     for (i= 0, start_group= group_list ;
25359 	 i++ < pos ;
25360 	 start_group= start_group->next)
25361       ;
25362 
25363     it.rewind();
25364     while ((item= it++))
25365     {
25366       if (item == first_field)
25367       {
25368 	real_fields= 1;				// End of hidden fields
25369 	ref_array_ix= 0;
25370       }
25371 
25372       if (item->type() == Item::SUM_FUNC_ITEM && !item->const_item() &&
25373           (!((Item_sum*) item)->depended_from() ||
25374            ((Item_sum *)item)->depended_from() == select_lex))
25375 
25376       {
25377 	/*
25378 	  This is a top level summary function that must be replaced with
25379 	  a sum function that is reset for this level.
25380 
25381 	  NOTE: This code creates an object which is not that nice in a
25382 	  sub select.  Fortunately it's not common to have rollup in
25383 	  sub selects.
25384 	*/
25385 	item= item->copy_or_same(thd);
25386 	((Item_sum*) item)->make_unique();
25387 	*(*func)= (Item_sum*) item;
25388 	(*func)++;
25389       }
25390       else
25391       {
25392 	/* Check if this is something that is part of this group by */
25393 	ORDER *group_tmp;
25394 	for (group_tmp= start_group, i= pos ;
25395              group_tmp ; group_tmp= group_tmp->next, i++)
25396 	{
25397           if (*group_tmp->item == item)
25398 	  {
25399 	    /*
25400 	      This is an element that is used by the GROUP BY and should be
25401 	      set to NULL in this level
25402 	    */
25403             Item_null_result *null_item= new (thd->mem_root) Item_null_result(thd);
25404             if (!null_item)
25405               return 1;
25406 	    item->maybe_null= 1;		// Value will be null sometimes
25407             null_item->result_field= item->get_tmp_table_field();
25408             item= null_item;
25409 	    break;
25410 	  }
25411 	}
25412       }
25413       ref_array_start[ref_array_ix]= item;
25414       if (real_fields)
25415       {
25416 	(void) new_it++;			// Point to next item
25417 	new_it.replace(item);			// Replace previous
25418 	ref_array_ix++;
25419       }
25420       else
25421 	ref_array_ix--;
25422     }
25423   }
25424   sum_funcs_end[0]= *func;			// Point to last function
25425   return 0;
25426 }
25427 
25428 /**
25429   Send all rollup levels higher than the current one to the client.
25430 
25431   @b SAMPLE
25432     @code
25433       SELECT a, b, c SUM(b) FROM t1 GROUP BY a,b WITH ROLLUP
25434   @endcode
25435 
25436   @param idx		Level we are on:
25437                         - 0 = Total sum level
25438                         - 1 = First group changed  (a)
25439                         - 2 = Second group changed (a,b)
25440 
25441   @retval
25442     0   ok
25443   @retval
25444     1   If send_data_failed()
25445 */
25446 
rollup_send_data(uint idx)25447 int JOIN::rollup_send_data(uint idx)
25448 {
25449   uint i;
25450   for (i= send_group_parts ; i-- > idx ; )
25451   {
25452     int res= 0;
25453     /* Get reference pointers to sum functions in place */
25454     copy_ref_ptr_array(ref_ptrs, rollup.ref_pointer_arrays[i]);
25455     if ((!having || having->val_int()))
25456     {
25457       if (send_records < unit->select_limit_cnt && do_send_rows &&
25458 	  (res= result->send_data(rollup.fields[i])) > 0)
25459 	return 1;
25460       if (!res)
25461         send_records++;
25462     }
25463   }
25464   /* Restore ref_pointer_array */
25465   set_items_ref_array(current_ref_ptrs);
25466   return 0;
25467 }
25468 
25469 /**
25470   Write all rollup levels higher than the current one to a temp table.
25471 
25472   @b SAMPLE
25473     @code
25474       SELECT a, b, SUM(c) FROM t1 GROUP BY a,b WITH ROLLUP
25475   @endcode
25476 
25477   @param idx                 Level we are on:
25478                                - 0 = Total sum level
25479                                - 1 = First group changed  (a)
25480                                - 2 = Second group changed (a,b)
25481   @param table               reference to temp table
25482 
25483   @retval
25484     0   ok
25485   @retval
25486     1   if write_data_failed()
25487 */
25488 
rollup_write_data(uint idx,TMP_TABLE_PARAM * tmp_table_param_arg,TABLE * table_arg)25489 int JOIN::rollup_write_data(uint idx, TMP_TABLE_PARAM *tmp_table_param_arg, TABLE *table_arg)
25490 {
25491   uint i;
25492   for (i= send_group_parts ; i-- > idx ; )
25493   {
25494     /* Get reference pointers to sum functions in place */
25495     copy_ref_ptr_array(ref_ptrs, rollup.ref_pointer_arrays[i]);
25496     if ((!having || having->val_int()))
25497     {
25498       int write_error;
25499       Item *item;
25500       List_iterator_fast<Item> it(rollup.fields[i]);
25501       while ((item= it++))
25502       {
25503         if (item->type() == Item::NULL_ITEM && item->is_result_field())
25504           item->save_in_result_field(1);
25505       }
25506       copy_sum_funcs(sum_funcs_end[i+1], sum_funcs_end[i]);
25507       if (unlikely((write_error=
25508                     table_arg->file->ha_write_tmp_row(table_arg->record[0]))))
25509       {
25510 	if (create_internal_tmp_table_from_heap(thd, table_arg,
25511                                                 tmp_table_param_arg->start_recinfo,
25512                                                 &tmp_table_param_arg->recinfo,
25513                                                 write_error, 0, NULL))
25514 	  return 1;
25515       }
25516     }
25517   }
25518   /* Restore ref_pointer_array */
25519   set_items_ref_array(current_ref_ptrs);
25520   return 0;
25521 }
25522 
25523 /**
25524   clear results if there are not rows found for group
25525   (end_send_group/end_write_group)
25526 */
25527 
clear()25528 void JOIN::clear()
25529 {
25530   clear_tables(this, 0);
25531   copy_fields(&tmp_table_param);
25532 
25533   if (sum_funcs)
25534   {
25535     Item_sum *func, **func_ptr= sum_funcs;
25536     while ((func= *(func_ptr++)))
25537       func->clear();
25538   }
25539 }
25540 
25541 
25542 /**
25543   Print an EXPLAIN line with all NULLs and given message in the 'Extra' column
25544 
25545   @retval
25546     0  ok
25547     1  OOM error or error from send_data()
25548 */
25549 
print_explain_message_line(select_result_sink * result,uint8 options,bool is_analyze,uint select_number,const char * select_type,ha_rows * rows,const char * message)25550 int print_explain_message_line(select_result_sink *result,
25551                                uint8 options, bool is_analyze,
25552                                uint select_number,
25553                                const char *select_type,
25554                                ha_rows *rows,
25555                                const char *message)
25556 {
25557   THD *thd= result->thd;
25558   MEM_ROOT *mem_root= thd->mem_root;
25559   Item *item_null= new (mem_root) Item_null(thd);
25560   List<Item> item_list;
25561 
25562   item_list.push_back(new (mem_root) Item_int(thd, (int32) select_number),
25563                       mem_root);
25564   item_list.push_back(new (mem_root) Item_string_sys(thd, select_type),
25565                       mem_root);
25566   /* `table` */
25567   item_list.push_back(item_null, mem_root);
25568 
25569   /* `partitions` */
25570   if (options & DESCRIBE_PARTITIONS)
25571     item_list.push_back(item_null, mem_root);
25572 
25573   /* type, possible_keys, key, key_len, ref */
25574   for (uint i=0 ; i < 5; i++)
25575     item_list.push_back(item_null, mem_root);
25576 
25577   /* `rows` */
25578   if (rows)
25579   {
25580     item_list.push_back(new (mem_root) Item_int(thd, *rows,
25581                                      MY_INT64_NUM_DECIMAL_DIGITS),
25582                         mem_root);
25583   }
25584   else
25585     item_list.push_back(item_null, mem_root);
25586 
25587   /* `r_rows` */
25588   if (is_analyze)
25589     item_list.push_back(item_null, mem_root);
25590 
25591   /* `filtered` */
25592   if (is_analyze || options & DESCRIBE_EXTENDED)
25593     item_list.push_back(item_null, mem_root);
25594 
25595   /* `r_filtered` */
25596   if (is_analyze)
25597     item_list.push_back(item_null, mem_root);
25598 
25599   /* `Extra` */
25600   if (message)
25601     item_list.push_back(new (mem_root) Item_string_sys(thd, message),
25602                         mem_root);
25603   else
25604     item_list.push_back(item_null, mem_root);
25605 
25606   if (unlikely(thd->is_fatal_error) || unlikely(result->send_data(item_list)))
25607     return 1;
25608   return 0;
25609 }
25610 
25611 
25612 /*
25613   Append MRR information from quick select to the given string
25614 */
25615 
explain_append_mrr_info(QUICK_RANGE_SELECT * quick,String * res)25616 void explain_append_mrr_info(QUICK_RANGE_SELECT *quick, String *res)
25617 {
25618   char mrr_str_buf[128];
25619   mrr_str_buf[0]=0;
25620   int len;
25621   handler *h= quick->head->file;
25622   len= h->multi_range_read_explain_info(quick->mrr_flags, mrr_str_buf,
25623                                         sizeof(mrr_str_buf));
25624   if (len > 0)
25625   {
25626     //res->append(STRING_WITH_LEN("; "));
25627     res->append(mrr_str_buf, len);
25628   }
25629 }
25630 
25631 
25632 ///////////////////////////////////////////////////////////////////////////////
append_possible_keys(MEM_ROOT * alloc,String_list & list,TABLE * table,key_map possible_keys)25633 int append_possible_keys(MEM_ROOT *alloc, String_list &list, TABLE *table,
25634                          key_map possible_keys)
25635 {
25636   uint j;
25637   for (j=0 ; j < table->s->keys ; j++)
25638   {
25639     if (possible_keys.is_set(j))
25640       if (!(list.append_str(alloc, table->key_info[j].name.str)))
25641         return 1;
25642   }
25643   return 0;
25644 }
25645 
25646 
save_explain_data(Explain_table_access * eta,table_map prefix_tables,bool distinct_arg,JOIN_TAB * first_top_tab)25647 bool JOIN_TAB::save_explain_data(Explain_table_access *eta,
25648                                  table_map prefix_tables,
25649                                  bool distinct_arg, JOIN_TAB *first_top_tab)
25650 {
25651   int quick_type;
25652   CHARSET_INFO *cs= system_charset_info;
25653   THD *thd=      join->thd;
25654   TABLE_LIST *table_list= table->pos_in_table_list;
25655   QUICK_SELECT_I *cur_quick= NULL;
25656   my_bool key_read;
25657   char table_name_buffer[SAFE_NAME_LEN];
25658   KEY *key_info= 0;
25659   uint key_len= 0;
25660   quick_type= -1;
25661 
25662   explain_plan= eta;
25663   eta->key.clear();
25664   eta->quick_info= NULL;
25665 
25666   SQL_SELECT *tab_select;
25667   /*
25668     We assume that if this table does pre-sorting, then it doesn't do filtering
25669     with SQL_SELECT.
25670   */
25671   DBUG_ASSERT(!(select && filesort));
25672   tab_select= (filesort)? filesort->select : select;
25673 
25674   if (filesort)
25675   {
25676     if (!(eta->pre_join_sort=
25677           new (thd->mem_root) Explain_aggr_filesort(thd->mem_root,
25678                                                     thd->lex->analyze_stmt,
25679                                                     filesort)))
25680       return 1;
25681   }
25682 
25683   tracker= &eta->tracker;
25684   jbuf_tracker= &eta->jbuf_tracker;
25685 
25686   /* Enable the table access time tracker only for "ANALYZE stmt" */
25687   if (thd->lex->analyze_stmt)
25688     table->file->set_time_tracker(&eta->op_tracker);
25689 
25690   /* No need to save id and select_type here, they are kept in Explain_select */
25691 
25692   /* table */
25693   if (table->derived_select_number)
25694   {
25695     /* Derived table name generation */
25696     size_t len= my_snprintf(table_name_buffer, sizeof(table_name_buffer)-1,
25697                          "<derived%u>",
25698                          table->derived_select_number);
25699     eta->table_name.copy(table_name_buffer, len, cs);
25700   }
25701   else if (bush_children)
25702   {
25703     JOIN_TAB *ctab= bush_children->start;
25704     /* table */
25705     size_t len= my_snprintf(table_name_buffer,
25706                          sizeof(table_name_buffer)-1,
25707                          "<subquery%d>",
25708                          ctab->emb_sj_nest->sj_subq_pred->get_identifier());
25709     eta->table_name.copy(table_name_buffer, len, cs);
25710   }
25711   else
25712   {
25713     TABLE_LIST *real_table= table->pos_in_table_list;
25714     /*
25715       When multi-table UPDATE/DELETE does updates/deletes to a VIEW, the view
25716       is merged in a certain particular way (grep for DT_MERGE_FOR_INSERT).
25717 
25718       As a result, view's underlying tables have $tbl->pos_in_table_list={view}.
25719       We don't want to print view name in EXPLAIN, we want underlying table's
25720       alias (like specified in the view definition).
25721     */
25722     if (real_table->merged_for_insert)
25723     {
25724       TABLE_LIST *view_child= real_table->view->select_lex.table_list.first;
25725       for (;view_child; view_child= view_child->next_local)
25726       {
25727         if (view_child->table == table)
25728         {
25729           real_table= view_child;
25730           break;
25731         }
25732       }
25733     }
25734     eta->table_name.copy(real_table->alias.str, real_table->alias.length, cs);
25735   }
25736 
25737   /* "partitions" column */
25738   {
25739 #ifdef WITH_PARTITION_STORAGE_ENGINE
25740     partition_info *part_info;
25741     if (!table->derived_select_number &&
25742         (part_info= table->part_info))
25743     { //TODO: all thd->mem_root here should be fixed
25744       make_used_partitions_str(thd->mem_root, part_info, &eta->used_partitions,
25745                                eta->used_partitions_list);
25746       eta->used_partitions_set= true;
25747     }
25748     else
25749       eta->used_partitions_set= false;
25750 #else
25751     /* just produce empty column if partitioning is not compiled in */
25752     eta->used_partitions_set= false;
25753 #endif
25754   }
25755 
25756   /* "type" column */
25757   enum join_type tab_type= type;
25758   if ((type == JT_ALL || type == JT_HASH) &&
25759        tab_select && tab_select->quick && use_quick != 2)
25760   {
25761     cur_quick= tab_select->quick;
25762     quick_type= cur_quick->get_type();
25763     if ((quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) ||
25764         (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT) ||
25765         (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) ||
25766         (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION))
25767       tab_type= type == JT_ALL ? JT_INDEX_MERGE : JT_HASH_INDEX_MERGE;
25768     else
25769       tab_type= type == JT_ALL ? JT_RANGE : JT_HASH_RANGE;
25770   }
25771   eta->type= tab_type;
25772 
25773   /* Build "possible_keys" value */
25774   // psergey-todo: why does this use thd MEM_ROOT??? Doesn't this
25775   // break ANALYZE ? thd->mem_root will be freed, and after that we will
25776   // attempt to print the query plan?
25777   if (append_possible_keys(thd->mem_root, eta->possible_keys, table, keys))
25778     return 1;
25779   // psergey-todo: ^ check for error return code
25780 
25781   /* Build "key", "key_len", and "ref" */
25782   if (tab_type == JT_NEXT)
25783   {
25784     key_info= table->key_info+index;
25785     key_len= key_info->key_length;
25786   }
25787   else if (ref.key_parts)
25788   {
25789     key_info= get_keyinfo_by_key_no(ref.key);
25790     key_len= ref.key_length;
25791   }
25792 
25793   /*
25794     In STRAIGHT_JOIN queries, there can be join tabs with JT_CONST type
25795     that still have quick selects.
25796   */
25797   if (tab_select && tab_select->quick && tab_type != JT_CONST)
25798   {
25799     if (!(eta->quick_info= tab_select->quick->get_explain(thd->mem_root)))
25800       return 1;
25801   }
25802 
25803   if (key_info) /* 'index' or 'ref' access */
25804   {
25805     eta->key.set(thd->mem_root, key_info, key_len);
25806 
25807     if (ref.key_parts && tab_type != JT_FT)
25808     {
25809       store_key **key_ref= ref.key_copy;
25810       for (uint kp= 0; kp < ref.key_parts; kp++)
25811       {
25812         if ((key_part_map(1) << kp) & ref.const_ref_part_map)
25813         {
25814           if (!(eta->ref_list.append_str(thd->mem_root, "const")))
25815             return 1;
25816           /*
25817             create_ref_for_key() handles keypart=const equalities as follows:
25818               - non-EXPLAIN execution will copy the "const" to lookup tuple
25819                 immediately and will not add an element to ref.key_copy
25820               - EXPLAIN will put an element into ref.key_copy. Since we've
25821                 just printed "const" for it, we should skip it here
25822           */
25823           if (thd->lex->describe)
25824             key_ref++;
25825         }
25826         else
25827         {
25828           if (!(eta->ref_list.append_str(thd->mem_root, (*key_ref)->name())))
25829             return 1;
25830           key_ref++;
25831         }
25832       }
25833     }
25834   }
25835 
25836   if (tab_type == JT_HASH_NEXT) /* full index scan + hash join */
25837   {
25838     eta->hash_next_key.set(thd->mem_root,
25839                            & table->key_info[index],
25840                            table->key_info[index].key_length);
25841     // psergey-todo: ^ is the above correct? are we necessarily joining on all
25842     // columns?
25843   }
25844 
25845   if (!key_info)
25846   {
25847     if (table_list && /* SJM bushes don't have table_list */
25848         table_list->schema_table &&
25849         table_list->schema_table->i_s_requested_object & OPTIMIZE_I_S_TABLE)
25850     {
25851       IS_table_read_plan *is_table_read_plan= table_list->is_table_read_plan;
25852       const char *tmp_buff;
25853       int f_idx;
25854       StringBuffer<64> key_name_buf;
25855       if (is_table_read_plan->trivial_show_command ||
25856           is_table_read_plan->has_db_lookup_value())
25857       {
25858         /* The "key" has the name of the column referring to the database */
25859         f_idx= table_list->schema_table->idx_field1;
25860         tmp_buff= table_list->schema_table->fields_info[f_idx].field_name;
25861         key_name_buf.append(tmp_buff, strlen(tmp_buff), cs);
25862       }
25863       if (is_table_read_plan->trivial_show_command ||
25864           is_table_read_plan->has_table_lookup_value())
25865       {
25866         if (is_table_read_plan->trivial_show_command ||
25867             is_table_read_plan->has_db_lookup_value())
25868           key_name_buf.append(',');
25869 
25870         f_idx= table_list->schema_table->idx_field2;
25871         tmp_buff= table_list->schema_table->fields_info[f_idx].field_name;
25872         key_name_buf.append(tmp_buff, strlen(tmp_buff), cs);
25873       }
25874 
25875       if (key_name_buf.length())
25876         eta->key.set_pseudo_key(thd->mem_root, key_name_buf.c_ptr_safe());
25877     }
25878   }
25879 
25880   /* "rows" */
25881   if (table_list /* SJM bushes don't have table_list */ &&
25882       table_list->schema_table)
25883   {
25884     /* I_S tables have rows=extra=NULL */
25885     eta->rows_set= false;
25886     eta->filtered_set= false;
25887   }
25888   else
25889   {
25890     ha_rows examined_rows= get_examined_rows();
25891 
25892     eta->rows_set= true;
25893     eta->rows= examined_rows;
25894 
25895     /* "filtered"  */
25896     float f= 0.0;
25897     if (examined_rows)
25898     {
25899       double pushdown_cond_selectivity= cond_selectivity;
25900       if (pushdown_cond_selectivity == 1.0)
25901         f= (float) (100.0 * records_read / examined_rows);
25902       else
25903         f= (float) (100.0 * pushdown_cond_selectivity);
25904     }
25905     set_if_smaller(f, 100.0);
25906     eta->filtered_set= true;
25907     eta->filtered= f;
25908   }
25909 
25910   /* Build "Extra" field and save it */
25911   key_read= table->file->keyread_enabled();
25912   if ((tab_type == JT_NEXT || tab_type == JT_CONST) &&
25913       table->covering_keys.is_set(index))
25914     key_read=1;
25915   if (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT &&
25916       !((QUICK_ROR_INTERSECT_SELECT*)cur_quick)->need_to_fetch_row)
25917     key_read=1;
25918 
25919   if (info)
25920   {
25921     eta->push_extra(info);
25922   }
25923   else if (packed_info & TAB_INFO_HAVE_VALUE)
25924   {
25925     if (packed_info & TAB_INFO_USING_INDEX)
25926       eta->push_extra(ET_USING_INDEX);
25927     if (packed_info & TAB_INFO_USING_WHERE)
25928       eta->push_extra(ET_USING_WHERE);
25929     if (packed_info & TAB_INFO_FULL_SCAN_ON_NULL)
25930       eta->push_extra(ET_FULL_SCAN_ON_NULL_KEY);
25931   }
25932   else
25933   {
25934     uint keyno= MAX_KEY;
25935     if (ref.key_parts)
25936       keyno= ref.key;
25937     else if (tab_select && cur_quick)
25938       keyno = cur_quick->index;
25939 
25940     if (keyno != MAX_KEY && keyno == table->file->pushed_idx_cond_keyno &&
25941         table->file->pushed_idx_cond)
25942     {
25943       eta->push_extra(ET_USING_INDEX_CONDITION);
25944       eta->pushed_index_cond= table->file->pushed_idx_cond;
25945     }
25946     else if (cache_idx_cond)
25947     {
25948       eta->push_extra(ET_USING_INDEX_CONDITION_BKA);
25949       eta->pushed_index_cond= cache_idx_cond;
25950     }
25951 
25952     if (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION ||
25953         quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT ||
25954         quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT ||
25955         quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)
25956     {
25957       eta->push_extra(ET_USING);
25958     }
25959     if (tab_select)
25960     {
25961       if (use_quick == 2)
25962       {
25963         eta->push_extra(ET_RANGE_CHECKED_FOR_EACH_RECORD);
25964         eta->range_checked_fer= new (thd->mem_root) Explain_range_checked_fer;
25965         if (eta->range_checked_fer)
25966           eta->range_checked_fer->
25967             append_possible_keys_stat(thd->mem_root, table, keys);
25968       }
25969       else if (tab_select->cond ||
25970                (cache_select && cache_select->cond))
25971       {
25972         const COND *pushed_cond= table->file->pushed_cond;
25973 
25974         if ((table->file->ha_table_flags() &
25975               HA_CAN_TABLE_CONDITION_PUSHDOWN) &&
25976             pushed_cond)
25977         {
25978           eta->push_extra(ET_USING_WHERE_WITH_PUSHED_CONDITION);
25979         }
25980         else
25981         {
25982           eta->where_cond= tab_select->cond;
25983           eta->cache_cond= cache_select? cache_select->cond : NULL;
25984           eta->push_extra(ET_USING_WHERE);
25985         }
25986       }
25987     }
25988     if (table_list /* SJM bushes don't have table_list */ &&
25989         table_list->schema_table &&
25990         table_list->schema_table->i_s_requested_object & OPTIMIZE_I_S_TABLE)
25991     {
25992       if (!table_list->table_open_method)
25993         eta->push_extra(ET_SKIP_OPEN_TABLE);
25994       else if (table_list->table_open_method == OPEN_FRM_ONLY)
25995         eta->push_extra(ET_OPEN_FRM_ONLY);
25996       else
25997         eta->push_extra(ET_OPEN_FULL_TABLE);
25998       /* psergey-note: the following has a bug.*/
25999       if (table_list->is_table_read_plan->trivial_show_command ||
26000           (table_list->is_table_read_plan->has_db_lookup_value() &&
26001            table_list->is_table_read_plan->has_table_lookup_value()))
26002         eta->push_extra(ET_SCANNED_0_DATABASES);
26003       else if (table_list->is_table_read_plan->has_db_lookup_value() ||
26004                table_list->is_table_read_plan->has_table_lookup_value())
26005         eta->push_extra(ET_SCANNED_1_DATABASE);
26006       else
26007         eta->push_extra(ET_SCANNED_ALL_DATABASES);
26008     }
26009     if (key_read)
26010     {
26011       if (quick_type == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)
26012       {
26013         QUICK_GROUP_MIN_MAX_SELECT *qgs=
26014           (QUICK_GROUP_MIN_MAX_SELECT *) tab_select->quick;
26015         eta->push_extra(ET_USING_INDEX_FOR_GROUP_BY);
26016         eta->loose_scan_is_scanning= qgs->loose_scan_is_scanning();
26017       }
26018       else
26019         eta->push_extra(ET_USING_INDEX);
26020     }
26021     if (table->reginfo.not_exists_optimize)
26022       eta->push_extra(ET_NOT_EXISTS);
26023 
26024     if (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE)
26025     {
26026       explain_append_mrr_info((QUICK_RANGE_SELECT*)(tab_select->quick),
26027                               &eta->mrr_type);
26028       if (eta->mrr_type.length() > 0)
26029         eta->push_extra(ET_USING_MRR);
26030     }
26031 
26032     if (shortcut_for_distinct)
26033       eta->push_extra(ET_DISTINCT);
26034 
26035     if (loosescan_match_tab)
26036     {
26037       eta->push_extra(ET_LOOSESCAN);
26038     }
26039 
26040     if (first_weedout_table)
26041     {
26042       eta->start_dups_weedout= true;
26043       eta->push_extra(ET_START_TEMPORARY);
26044     }
26045     if (check_weed_out_table)
26046     {
26047       eta->push_extra(ET_END_TEMPORARY);
26048       eta->end_dups_weedout= true;
26049     }
26050 
26051     else if (do_firstmatch)
26052     {
26053       if (do_firstmatch == /*join->join_tab*/ first_top_tab - 1)
26054         eta->push_extra(ET_FIRST_MATCH);
26055       else
26056       {
26057         eta->push_extra(ET_FIRST_MATCH);
26058         TABLE *prev_table=do_firstmatch->table;
26059         if (prev_table->derived_select_number)
26060         {
26061           char namebuf[NAME_LEN];
26062           /* Derived table name generation */
26063           size_t len= my_snprintf(namebuf, sizeof(namebuf)-1,
26064                                "<derived%u>",
26065                                prev_table->derived_select_number);
26066           eta->firstmatch_table_name.append(namebuf, len);
26067         }
26068         else
26069           eta->firstmatch_table_name.append(&prev_table->pos_in_table_list->alias);
26070       }
26071     }
26072 
26073     for (uint part= 0; part < ref.key_parts; part++)
26074     {
26075       if (ref.cond_guards[part])
26076       {
26077         eta->push_extra(ET_FULL_SCAN_ON_NULL_KEY);
26078         eta->full_scan_on_null_key= true;
26079         break;
26080       }
26081     }
26082 
26083     if (cache)
26084     {
26085       eta->push_extra(ET_USING_JOIN_BUFFER);
26086       if (cache->save_explain_data(&eta->bka_type))
26087         return 1;
26088     }
26089   }
26090 
26091   /*
26092     In case this is a derived table, here we remember the number of
26093     subselect that used to produce it.
26094   */
26095   if (!(table_list && table_list->is_with_table_recursive_reference()))
26096     eta->derived_select_number= table->derived_select_number;
26097 
26098   /* The same for non-merged semi-joins */
26099   eta->non_merged_sjm_number = get_non_merged_semijoin_select();
26100 
26101   return 0;
26102 }
26103 
26104 
26105 /*
26106   Walk through join->aggr_tables and save aggregation/grouping query plan into
26107   an Explain_select object
26108 
26109   @retval
26110   0 ok
26111   1 error
26112 */
26113 
save_agg_explain_data(JOIN * join,Explain_select * xpl_sel)26114 bool save_agg_explain_data(JOIN *join, Explain_select *xpl_sel)
26115 {
26116   JOIN_TAB *join_tab=join->join_tab + join->exec_join_tab_cnt();
26117   Explain_aggr_node *prev_node;
26118   Explain_aggr_node *node= xpl_sel->aggr_tree;
26119   bool is_analyze= join->thd->lex->analyze_stmt;
26120   THD *thd= join->thd;
26121 
26122   for (uint i= 0; i < join->aggr_tables; i++, join_tab++)
26123   {
26124     // Each aggregate means a temp.table
26125     prev_node= node;
26126     if (!(node= new (thd->mem_root) Explain_aggr_tmp_table))
26127       return 1;
26128     node->child= prev_node;
26129 
26130     if (join_tab->window_funcs_step)
26131     {
26132       Explain_aggr_node *new_node=
26133         join_tab->window_funcs_step->save_explain_plan(thd->mem_root,
26134                                                        is_analyze);
26135       if (!new_node)
26136         return 1;
26137 
26138       prev_node=node;
26139       node= new_node;
26140       node->child= prev_node;
26141     }
26142 
26143     /* The below matches execution in join_init_read_record() */
26144     if (join_tab->distinct)
26145     {
26146       prev_node= node;
26147       if (!(node= new (thd->mem_root) Explain_aggr_remove_dups))
26148         return 1;
26149       node->child= prev_node;
26150     }
26151 
26152     if (join_tab->filesort)
26153     {
26154       Explain_aggr_filesort *eaf =
26155         new (thd->mem_root) Explain_aggr_filesort(thd->mem_root, is_analyze, join_tab->filesort);
26156       if (!eaf)
26157         return 1;
26158       prev_node= node;
26159       node= eaf;
26160       node->child= prev_node;
26161     }
26162   }
26163   xpl_sel->aggr_tree= node;
26164   return 0;
26165 }
26166 
26167 
26168 /**
26169   Save Query Plan Footprint
26170 
26171   @note
26172     Currently, this function may be called multiple times
26173 
26174   @retval
26175   0 ok
26176   1 error
26177 */
26178 
save_explain_data_intern(Explain_query * output,bool need_tmp_table_arg,bool need_order_arg,bool distinct_arg,const char * message)26179 int JOIN::save_explain_data_intern(Explain_query *output,
26180                                    bool need_tmp_table_arg,
26181                                    bool need_order_arg, bool distinct_arg,
26182                                    const char *message)
26183 {
26184   JOIN *join= this; /* Legacy: this code used to be a non-member function */
26185   DBUG_ENTER("JOIN::save_explain_data_intern");
26186   DBUG_PRINT("info", ("Select %p, type %s, message %s",
26187 		      join->select_lex, join->select_lex->type,
26188 		      message ? message : "NULL"));
26189   DBUG_ASSERT(have_query_plan == QEP_AVAILABLE);
26190   /* fake_select_lex is created/printed by Explain_union */
26191   DBUG_ASSERT(join->select_lex != join->unit->fake_select_lex);
26192 
26193   /* There should be no attempts to save query plans for merged selects */
26194   DBUG_ASSERT(!join->select_lex->master_unit()->derived ||
26195               join->select_lex->master_unit()->derived->is_materialized_derived() ||
26196               join->select_lex->master_unit()->derived->is_with_table());
26197 
26198   /* Don't log this into the slow query log */
26199 
26200   if (message)
26201   {
26202     if (!(explain= new (output->mem_root)
26203           Explain_select(output->mem_root,
26204                          thd->lex->analyze_stmt)))
26205       DBUG_RETURN(1);
26206 #ifndef DBUG_OFF
26207     explain->select_lex= select_lex;
26208 #endif
26209     join->select_lex->set_explain_type(true);
26210 
26211     explain->select_id= join->select_lex->select_number;
26212     explain->select_type= join->select_lex->type;
26213     explain->linkage= select_lex->linkage;
26214     explain->using_temporary= need_tmp;
26215     explain->using_filesort=  need_order_arg;
26216     /* Setting explain->message means that all other members are invalid */
26217     explain->message= message;
26218 
26219     if (select_lex->master_unit()->derived)
26220       explain->connection_type= Explain_node::EXPLAIN_NODE_DERIVED;
26221     if (save_agg_explain_data(this, explain))
26222       DBUG_RETURN(1);
26223 
26224     output->add_node(explain);
26225   }
26226   else if (pushdown_query)
26227   {
26228     if (!(explain= new (output->mem_root)
26229           Explain_select(output->mem_root,
26230                          thd->lex->analyze_stmt)))
26231       DBUG_RETURN(1);
26232     select_lex->set_explain_type(true);
26233 
26234     explain->select_id=   select_lex->select_number;
26235     explain->select_type= select_lex->type;
26236     explain->linkage= select_lex->linkage;
26237     explain->using_temporary= need_tmp;
26238     explain->using_filesort=  need_order_arg;
26239     explain->message= "Storage engine handles GROUP BY";
26240 
26241     if (select_lex->master_unit()->derived)
26242       explain->connection_type= Explain_node::EXPLAIN_NODE_DERIVED;
26243     output->add_node(explain);
26244   }
26245   else
26246   {
26247     Explain_select *xpl_sel;
26248     explain= xpl_sel=
26249       new (output->mem_root) Explain_select(output->mem_root,
26250                                             thd->lex->analyze_stmt);
26251     if (!explain)
26252       DBUG_RETURN(1);
26253 
26254     table_map used_tables=0;
26255 
26256     join->select_lex->set_explain_type(true);
26257     xpl_sel->select_id= join->select_lex->select_number;
26258     xpl_sel->select_type= join->select_lex->type;
26259     xpl_sel->linkage= select_lex->linkage;
26260     xpl_sel->is_lateral= ((select_lex->linkage == DERIVED_TABLE_TYPE) &&
26261                           (select_lex->uncacheable & UNCACHEABLE_DEPENDENT));
26262     if (select_lex->master_unit()->derived)
26263       xpl_sel->connection_type= Explain_node::EXPLAIN_NODE_DERIVED;
26264 
26265     if (save_agg_explain_data(this, xpl_sel))
26266       DBUG_RETURN(1);
26267 
26268     xpl_sel->exec_const_cond= exec_const_cond;
26269     xpl_sel->outer_ref_cond= outer_ref_cond;
26270     xpl_sel->pseudo_bits_cond= pseudo_bits_cond;
26271     if (tmp_having)
26272       xpl_sel->having= tmp_having;
26273     else
26274       xpl_sel->having= having;
26275     xpl_sel->having_value= having_value;
26276 
26277     JOIN_TAB* const first_top_tab= join->first_breadth_first_tab();
26278     JOIN_TAB* prev_bush_root_tab= NULL;
26279 
26280     Explain_basic_join *cur_parent= xpl_sel;
26281 
26282     for (JOIN_TAB *tab= first_explain_order_tab(join); tab;
26283          tab= next_explain_order_tab(join, tab))
26284     {
26285       JOIN_TAB *saved_join_tab= NULL;
26286       TABLE *cur_table= tab->table;
26287 
26288       /* Don't show eliminated tables */
26289       if (cur_table->map & join->eliminated_tables)
26290       {
26291         used_tables|= cur_table->map;
26292         continue;
26293       }
26294 
26295 
26296       Explain_table_access *eta= (new (output->mem_root)
26297                                   Explain_table_access(output->mem_root));
26298 
26299       if (!eta)
26300         DBUG_RETURN(1);
26301       if (tab->bush_root_tab != prev_bush_root_tab)
26302       {
26303         if (tab->bush_root_tab)
26304         {
26305           /*
26306             We've entered an SJ-Materialization nest. Create an object for it.
26307           */
26308           if (!(cur_parent=
26309                 new (output->mem_root) Explain_basic_join(output->mem_root)))
26310             DBUG_RETURN(1);
26311 
26312           JOIN_TAB *first_child= tab->bush_root_tab->bush_children->start;
26313           cur_parent->select_id=
26314             first_child->emb_sj_nest->sj_subq_pred->get_identifier();
26315         }
26316         else
26317         {
26318           /*
26319             We've just left an SJ-Materialization nest. We are at the join tab
26320             that 'embeds the nest'
26321           */
26322           DBUG_ASSERT(tab->bush_children);
26323           eta->sjm_nest= cur_parent;
26324           cur_parent= xpl_sel;
26325         }
26326       }
26327       prev_bush_root_tab= tab->bush_root_tab;
26328 
26329       cur_parent->add_table(eta, output);
26330       if (tab->save_explain_data(eta, used_tables, distinct_arg, first_top_tab))
26331         DBUG_RETURN(1);
26332 
26333       if (saved_join_tab)
26334         tab= saved_join_tab;
26335 
26336       // For next iteration
26337       used_tables|= cur_table->map;
26338     }
26339     output->add_node(xpl_sel);
26340   }
26341 
26342   for (SELECT_LEX_UNIT *tmp_unit= join->select_lex->first_inner_unit();
26343        tmp_unit;
26344        tmp_unit= tmp_unit->next_unit())
26345   {
26346     /*
26347       Display subqueries only if
26348       (1) they are not parts of ON clauses that were eliminated by table
26349           elimination.
26350       (2) they are not merged derived tables
26351       (3) they are not hanging CTEs (they are needed for execution)
26352     */
26353     if (!(tmp_unit->item && tmp_unit->item->eliminated) &&    // (1)
26354         (!tmp_unit->derived ||
26355          tmp_unit->derived->is_materialized_derived()) &&     // (2)
26356         (!tmp_unit->with_element  ||
26357          (tmp_unit->derived &&
26358           tmp_unit->derived->derived_result &&
26359           !tmp_unit->with_element->is_hanging_recursive())))  // (3)
26360    {
26361       explain->add_child(tmp_unit->first_select()->select_number);
26362     }
26363   }
26364 
26365   if (select_lex->is_top_level_node())
26366     output->query_plan_ready();
26367 
26368   DBUG_RETURN(0);
26369 }
26370 
26371 
26372 /*
26373   This function serves as "shortcut point" for EXPLAIN queries.
26374 
26375   The EXPLAIN statement executes just like its SELECT counterpart would
26376   execute, except that JOIN::exec() will call select_describe() instead of
26377   actually executing the query.
26378 
26379   Inside select_describe():
26380   - Query plan is updated with latest QEP choices made at the start of
26381     JOIN::exec().
26382   - the proces of "almost execution" is invoked for the children subqueries.
26383 
26384   Overall, select_describe() is a legacy of old EXPLAIN implementation and
26385   should be removed.
26386 */
26387 
select_describe(JOIN * join,bool need_tmp_table,bool need_order,bool distinct,const char * message)26388 static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
26389 			    bool distinct,const char *message)
26390 {
26391   THD *thd=join->thd;
26392   select_result *result=join->result;
26393   DBUG_ENTER("select_describe");
26394 
26395   /* Update the QPF with latest values of using_temporary, using_filesort */
26396   for (SELECT_LEX_UNIT *unit= join->select_lex->first_inner_unit();
26397        unit;
26398        unit= unit->next_unit())
26399   {
26400     /*
26401       This fix_fields() call is to handle an edge case like this:
26402 
26403         SELECT ... UNION SELECT ... ORDER BY (SELECT ...)
26404 
26405       for such queries, we'll get here before having called
26406       subquery_expr->fix_fields(), which will cause failure to
26407     */
26408     if (unit->item && !unit->item->fixed)
26409     {
26410       Item *ref= unit->item;
26411       if (unit->item->fix_fields(thd, &ref))
26412         DBUG_VOID_RETURN;
26413       DBUG_ASSERT(ref == unit->item);
26414     }
26415 
26416     /*
26417       Save plans for child subqueries, when
26418       (1) they are not parts of eliminated WHERE/ON clauses.
26419       (2) they are not VIEWs that were "merged for INSERT".
26420       (3) they are not hanging CTEs (they are needed for execution)
26421     */
26422     if (!(unit->item && unit->item->eliminated) &&                     // (1)
26423         !(unit->derived && unit->derived->merged_for_insert) &&        // (2)
26424         (!unit->with_element ||
26425           (unit->derived &&
26426            unit->derived->derived_result &&
26427            !unit->with_element->is_hanging_recursive())))              // (3)
26428     {
26429       if (mysql_explain_union(thd, unit, result))
26430         DBUG_VOID_RETURN;
26431     }
26432   }
26433   DBUG_VOID_RETURN;
26434 }
26435 
26436 
mysql_explain_union(THD * thd,SELECT_LEX_UNIT * unit,select_result * result)26437 bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit, select_result *result)
26438 {
26439   DBUG_ENTER("mysql_explain_union");
26440   bool res= 0;
26441   SELECT_LEX *first= unit->first_select();
26442 
26443   for (SELECT_LEX *sl= first; sl; sl= sl->next_select())
26444   {
26445     sl->set_explain_type(FALSE);
26446     sl->options|= SELECT_DESCRIBE;
26447   }
26448 
26449   if (unit->is_unit_op() || unit->fake_select_lex)
26450   {
26451     if (unit->union_needs_tmp_table() && unit->fake_select_lex)
26452     {
26453       unit->fake_select_lex->select_number= FAKE_SELECT_LEX_ID; // just for initialization
26454       unit->fake_select_lex->type= unit_operation_text[unit->common_op()];
26455       unit->fake_select_lex->options|= SELECT_DESCRIBE;
26456     }
26457     if (!(res= unit->prepare(unit->derived, result,
26458                              SELECT_NO_UNLOCK | SELECT_DESCRIBE)))
26459       res= unit->exec();
26460   }
26461   else
26462   {
26463     thd->lex->current_select= first;
26464     unit->set_limit(unit->global_parameters());
26465     res= mysql_select(thd,
26466                       first->table_list.first,
26467                       first->with_wild, first->item_list,
26468                       first->where,
26469                       first->order_list.elements + first->group_list.elements,
26470                       first->order_list.first,
26471                       first->group_list.first,
26472                       first->having,
26473                       thd->lex->proc_list.first,
26474                       first->options | thd->variables.option_bits | SELECT_DESCRIBE,
26475                       result, unit, first);
26476   }
26477   DBUG_RETURN(res || thd->is_error());
26478 }
26479 
26480 
print_table_array(THD * thd,table_map eliminated_tables,String * str,TABLE_LIST ** table,TABLE_LIST ** end,enum_query_type query_type)26481 static void print_table_array(THD *thd,
26482                               table_map eliminated_tables,
26483                               String *str, TABLE_LIST **table,
26484                               TABLE_LIST **end,
26485                               enum_query_type query_type)
26486 {
26487   (*table)->print(thd, eliminated_tables, str, query_type);
26488 
26489   for (TABLE_LIST **tbl= table + 1; tbl < end; tbl++)
26490   {
26491     TABLE_LIST *curr= *tbl;
26492 
26493     /*
26494       The "eliminated_tables &&" check guards againist the case of
26495       printing the query for CREATE VIEW. We do that without having run
26496       JOIN::optimize() and so will have nested_join->used_tables==0.
26497     */
26498     if (eliminated_tables &&
26499         ((curr->table && (curr->table->map & eliminated_tables)) ||
26500          (curr->nested_join && !(curr->nested_join->used_tables &
26501                                 ~eliminated_tables))))
26502     {
26503       /* as of 5.5, print_join doesnt put eliminated elements into array */
26504       DBUG_ASSERT(0);
26505       continue;
26506     }
26507 
26508     /* JOIN_TYPE_OUTER is just a marker unrelated to real join */
26509     if (curr->outer_join & (JOIN_TYPE_LEFT|JOIN_TYPE_RIGHT))
26510     {
26511       /* MySQL converts right to left joins */
26512       str->append(STRING_WITH_LEN(" left join "));
26513     }
26514     else if (curr->straight)
26515       str->append(STRING_WITH_LEN(" straight_join "));
26516     else if (curr->sj_inner_tables)
26517       str->append(STRING_WITH_LEN(" semi join "));
26518     else
26519       str->append(STRING_WITH_LEN(" join "));
26520 
26521     curr->print(thd, eliminated_tables, str, query_type);
26522     if (curr->on_expr)
26523     {
26524       str->append(STRING_WITH_LEN(" on("));
26525       curr->on_expr->print(str, query_type);
26526       str->append(')');
26527     }
26528   }
26529 }
26530 
26531 
26532 /*
26533   Check if the passed table is
26534    - a base table which was eliminated, or
26535    - a join nest which only contained eliminated tables (and so was eliminated,
26536      too)
26537 */
26538 
is_eliminated_table(table_map eliminated_tables,TABLE_LIST * tbl)26539 static bool is_eliminated_table(table_map eliminated_tables, TABLE_LIST *tbl)
26540 {
26541   return eliminated_tables &&
26542     ((tbl->table && (tbl->table->map & eliminated_tables)) ||
26543      (tbl->nested_join && !(tbl->nested_join->used_tables &
26544                             ~eliminated_tables)));
26545 }
26546 
26547 /**
26548   Print joins from the FROM clause.
26549 
26550   @param thd     thread handler
26551   @param str     string where table should be printed
26552   @param tables  list of tables in join
26553   @query_type    type of the query is being generated
26554 */
26555 
print_join(THD * thd,table_map eliminated_tables,String * str,List<TABLE_LIST> * tables,enum_query_type query_type)26556 static void print_join(THD *thd,
26557                        table_map eliminated_tables,
26558                        String *str,
26559                        List<TABLE_LIST> *tables,
26560                        enum_query_type query_type)
26561 {
26562   /* List is reversed => we should reverse it before using */
26563   List_iterator_fast<TABLE_LIST> ti(*tables);
26564   TABLE_LIST **table;
26565   DBUG_ENTER("print_join");
26566 
26567   /*
26568     If the QT_NO_DATA_EXPANSION flag is specified, we print the
26569     original table list, including constant tables that have been
26570     optimized away, as the constant tables may be referenced in the
26571     expression printed by Item_field::print() when this flag is given.
26572     Otherwise, only non-const tables are printed.
26573 
26574     Example:
26575 
26576     Original SQL:
26577     select * from (select 1) t
26578 
26579     Printed without QT_NO_DATA_EXPANSION:
26580     select '1' AS `1` from dual
26581 
26582     Printed with QT_NO_DATA_EXPANSION:
26583     select `t`.`1` from (select 1 AS `1`) `t`
26584   */
26585   const bool print_const_tables= (query_type & QT_NO_DATA_EXPANSION);
26586   size_t tables_to_print= 0;
26587 
26588   for (TABLE_LIST *t= ti++; t ; t= ti++)
26589   {
26590     /* See comment in print_table_array() about the second condition */
26591     if (print_const_tables || !t->optimized_away)
26592       if (!is_eliminated_table(eliminated_tables, t))
26593         tables_to_print++;
26594   }
26595   if (tables_to_print == 0)
26596   {
26597     str->append(STRING_WITH_LEN("dual"));
26598     DBUG_VOID_RETURN;                   // all tables were optimized away
26599   }
26600   ti.rewind();
26601 
26602   if (!(table= static_cast<TABLE_LIST **>(thd->alloc(sizeof(TABLE_LIST*) *
26603                                                      tables_to_print))))
26604     DBUG_VOID_RETURN;                   // out of memory
26605 
26606   TABLE_LIST *tmp, **t= table + (tables_to_print - 1);
26607   while ((tmp= ti++))
26608   {
26609     if (tmp->optimized_away && !print_const_tables)
26610       continue;
26611     if (is_eliminated_table(eliminated_tables, tmp))
26612       continue;
26613     *t--= tmp;
26614   }
26615 
26616   DBUG_ASSERT(tables->elements >= 1);
26617   /*
26618     Assert that the first table in the list isn't eliminated. This comes from
26619     the fact that the first table can't be inner table of an outer join.
26620   */
26621   DBUG_ASSERT(!eliminated_tables ||
26622               !(((*table)->table && ((*table)->table->map & eliminated_tables)) ||
26623                 ((*table)->nested_join && !((*table)->nested_join->used_tables &
26624                                            ~eliminated_tables))));
26625   /*
26626     If the first table is a semi-join nest, swap it with something that is
26627     not a semi-join nest.
26628   */
26629   if ((*table)->sj_inner_tables)
26630   {
26631     TABLE_LIST **end= table + tables_to_print;
26632     for (TABLE_LIST **t2= table; t2!=end; t2++)
26633     {
26634       if (!(*t2)->sj_inner_tables)
26635       {
26636         tmp= *t2;
26637         *t2= *table;
26638         *table= tmp;
26639         break;
26640       }
26641     }
26642   }
26643   print_table_array(thd, eliminated_tables, str, table,
26644                     table +  tables_to_print, query_type);
26645   DBUG_VOID_RETURN;
26646 }
26647 
26648 /**
26649   @brief Print an index hint
26650 
26651   @details Prints out the USE|FORCE|IGNORE index hint.
26652 
26653   @param      thd         the current thread
26654   @param[out] str         appends the index hint here
26655   @param      hint        what the hint is (as string : "USE INDEX"|
26656                           "FORCE INDEX"|"IGNORE INDEX")
26657   @param      hint_length the length of the string in 'hint'
26658   @param      indexes     a list of index names for the hint
26659 */
26660 
26661 void
print(THD * thd,String * str)26662 Index_hint::print(THD *thd, String *str)
26663 {
26664   switch (type)
26665   {
26666     case INDEX_HINT_IGNORE: str->append(STRING_WITH_LEN("IGNORE INDEX")); break;
26667     case INDEX_HINT_USE:    str->append(STRING_WITH_LEN("USE INDEX")); break;
26668     case INDEX_HINT_FORCE:  str->append(STRING_WITH_LEN("FORCE INDEX")); break;
26669   }
26670   str->append (STRING_WITH_LEN(" ("));
26671   if (key_name.length)
26672   {
26673     if (thd && !my_strnncoll(system_charset_info,
26674                              (const uchar *)key_name.str, key_name.length,
26675                              (const uchar *)primary_key_name,
26676                              strlen(primary_key_name)))
26677       str->append(primary_key_name);
26678     else
26679       append_identifier(thd, str, &key_name);
26680 }
26681   str->append(')');
26682 }
26683 
26684 
26685 /**
26686   Print table as it should be in join list.
26687 
26688   @param str   string where table should be printed
26689 */
26690 
print(THD * thd,table_map eliminated_tables,String * str,enum_query_type query_type)26691 void TABLE_LIST::print(THD *thd, table_map eliminated_tables, String *str,
26692                        enum_query_type query_type)
26693 {
26694   if (nested_join)
26695   {
26696     str->append('(');
26697     print_join(thd, eliminated_tables, str, &nested_join->join_list, query_type);
26698     str->append(')');
26699   }
26700   else if (jtbm_subselect)
26701   {
26702     if (jtbm_subselect->engine->engine_type() ==
26703           subselect_engine::SINGLE_SELECT_ENGINE)
26704     {
26705       /*
26706         We get here when conversion into materialization didn't finish (this
26707         happens when
26708         - The subquery is a degenerate case which produces 0 or 1 record
26709         - subquery's optimization didn't finish because of @@max_join_size
26710           limits
26711         - ... maybe some other cases like this
26712       */
26713       str->append(STRING_WITH_LEN(" <materialize> ("));
26714       jtbm_subselect->engine->print(str, query_type);
26715       str->append(')');
26716     }
26717     else
26718     {
26719       str->append(STRING_WITH_LEN(" <materialize> ("));
26720       subselect_hash_sj_engine *hash_engine;
26721       hash_engine= (subselect_hash_sj_engine*)jtbm_subselect->engine;
26722       hash_engine->materialize_engine->print(str, query_type);
26723       str->append(')');
26724     }
26725   }
26726   else
26727   {
26728     const char *cmp_name;                         // Name to compare with alias
26729     if (view_name.str)
26730     {
26731       // A view
26732 
26733       if (!(belong_to_view &&
26734             belong_to_view->compact_view_format))
26735       {
26736         append_identifier(thd, str, &view_db);
26737         str->append('.');
26738       }
26739       append_identifier(thd, str, &view_name);
26740       cmp_name= view_name.str;
26741     }
26742     else if (derived)
26743     {
26744       if (!is_with_table())
26745       {
26746         // A derived table
26747         str->append('(');
26748         derived->print(str, query_type);
26749         str->append(')');
26750         cmp_name= "";                               // Force printing of alias
26751       }
26752       else
26753       {
26754         append_identifier(thd, str, &table_name);
26755         cmp_name= table_name.str;
26756       }
26757     }
26758     else
26759     {
26760       // A normal table
26761 
26762       if (!(belong_to_view &&
26763             belong_to_view->compact_view_format))
26764       {
26765         append_identifier(thd, str, &db);
26766         str->append('.');
26767       }
26768       if (schema_table)
26769       {
26770         append_identifier(thd, str, &schema_table_name);
26771         cmp_name= schema_table_name.str;
26772       }
26773       else
26774       {
26775         append_identifier(thd, str, &table_name);
26776         cmp_name= table_name.str;
26777       }
26778 #ifdef WITH_PARTITION_STORAGE_ENGINE
26779       if (partition_names && partition_names->elements)
26780       {
26781         int i, num_parts= partition_names->elements;
26782         List_iterator<String> name_it(*(partition_names));
26783         str->append(STRING_WITH_LEN(" PARTITION ("));
26784         for (i= 1; i <= num_parts; i++)
26785         {
26786           String *name= name_it++;
26787           append_identifier(thd, str, name->c_ptr(), name->length());
26788           if (i != num_parts)
26789             str->append(',');
26790         }
26791         str->append(')');
26792       }
26793 #endif /* WITH_PARTITION_STORAGE_ENGINE */
26794     }
26795     if (table && table->versioned())
26796       vers_conditions.print(str, query_type);
26797 
26798     if (my_strcasecmp(table_alias_charset, cmp_name, alias.str))
26799     {
26800       char t_alias_buff[MAX_ALIAS_NAME];
26801       LEX_CSTRING t_alias= alias;
26802 
26803       str->append(' ');
26804       if (lower_case_table_names == 1)
26805       {
26806         if (alias.str && alias.str[0])
26807         {
26808           strmov(t_alias_buff, alias.str);
26809           t_alias.length= my_casedn_str(files_charset_info, t_alias_buff);
26810           t_alias.str= t_alias_buff;
26811         }
26812       }
26813 
26814       append_identifier(thd, str, &t_alias);
26815     }
26816 
26817     if (index_hints)
26818     {
26819       List_iterator<Index_hint> it(*index_hints);
26820       Index_hint *hint;
26821 
26822       while ((hint= it++))
26823       {
26824         str->append (STRING_WITH_LEN(" "));
26825         hint->print (thd, str);
26826       }
26827     }
26828   }
26829 }
26830 
26831 
print(THD * thd,String * str,enum_query_type query_type)26832 void st_select_lex::print(THD *thd, String *str, enum_query_type query_type)
26833 {
26834   DBUG_ASSERT(thd);
26835 
26836   if (tvc)
26837   {
26838     tvc->print(thd, str, query_type);
26839     return;
26840   }
26841 
26842   if ((query_type & QT_SHOW_SELECT_NUMBER) &&
26843       thd->lex->all_selects_list &&
26844       thd->lex->all_selects_list->link_next &&
26845       select_number != UINT_MAX &&
26846       select_number != INT_MAX)
26847   {
26848     str->append("/* select#");
26849     str->append_ulonglong(select_number);
26850     str->append(" */ ");
26851   }
26852 
26853   str->append(STRING_WITH_LEN("select "));
26854 
26855   if (join && join->cleaned)
26856   {
26857     /*
26858       JOIN already cleaned up so it is dangerous to print items
26859       because temporary tables they pointed on could be freed.
26860     */
26861     str->append('#');
26862     str->append(select_number);
26863     return;
26864   }
26865 
26866   /* First add options */
26867   if (options & SELECT_STRAIGHT_JOIN)
26868     str->append(STRING_WITH_LEN("straight_join "));
26869   if (options & SELECT_HIGH_PRIORITY)
26870     str->append(STRING_WITH_LEN("high_priority "));
26871   if (options & SELECT_DISTINCT)
26872     str->append(STRING_WITH_LEN("distinct "));
26873   if (options & SELECT_SMALL_RESULT)
26874     str->append(STRING_WITH_LEN("sql_small_result "));
26875   if (options & SELECT_BIG_RESULT)
26876     str->append(STRING_WITH_LEN("sql_big_result "));
26877   if (options & OPTION_BUFFER_RESULT)
26878     str->append(STRING_WITH_LEN("sql_buffer_result "));
26879   if (options & OPTION_FOUND_ROWS)
26880     str->append(STRING_WITH_LEN("sql_calc_found_rows "));
26881   switch (sql_cache)
26882   {
26883     case SQL_NO_CACHE:
26884       str->append(STRING_WITH_LEN("sql_no_cache "));
26885       break;
26886     case SQL_CACHE:
26887       str->append(STRING_WITH_LEN("sql_cache "));
26888       break;
26889     case SQL_CACHE_UNSPECIFIED:
26890       break;
26891     default:
26892       DBUG_ASSERT(0);
26893   }
26894 
26895   //Item List
26896   bool first= 1;
26897   /*
26898     outer_select() can not be used here because it is for name resolution
26899     and will return NULL at any end of name resolution chain (view/derived)
26900   */
26901   bool top_level= (get_master()->get_master() == 0);
26902   List_iterator_fast<Item> it(item_list);
26903   Item *item;
26904   while ((item= it++))
26905   {
26906     if (first)
26907       first= 0;
26908     else
26909       str->append(',');
26910 
26911     if ((is_subquery_function() && item->is_autogenerated_name) ||
26912         !item->name.str)
26913     {
26914       /*
26915         Do not print auto-generated aliases in subqueries. It has no purpose
26916         in a view definition or other contexts where the query is printed.
26917       */
26918       item->print(str, query_type);
26919     }
26920     else
26921     {
26922       /*
26923         Do not print illegal names (if it is not top level SELECT).
26924         Top level view checked (and correct name are assigned),
26925         other cases of top level SELECT are not important, because
26926         it is not "table field".
26927       */
26928       if (top_level ||
26929           !item->is_autogenerated_name ||
26930           !check_column_name(item->name.str))
26931         item->print_item_w_name(str, query_type);
26932       else
26933         item->print(str, query_type);
26934     }
26935   }
26936 
26937   /*
26938     from clause
26939     TODO: support USING/FORCE/IGNORE index
26940   */
26941   if (table_list.elements)
26942   {
26943     str->append(STRING_WITH_LEN(" from "));
26944     /* go through join tree */
26945     print_join(thd, join? join->eliminated_tables: 0, str, &top_join_list, query_type);
26946   }
26947   else if (where)
26948   {
26949     /*
26950       "SELECT 1 FROM DUAL WHERE 2" should not be printed as
26951       "SELECT 1 WHERE 2": the 1st syntax is valid, but the 2nd is not.
26952     */
26953     str->append(STRING_WITH_LEN(" from DUAL "));
26954   }
26955 
26956   // Where
26957   Item *cur_where= where;
26958   if (join)
26959     cur_where= join->conds;
26960   if (cur_where || cond_value != Item::COND_UNDEF)
26961   {
26962     str->append(STRING_WITH_LEN(" where "));
26963     if (cur_where)
26964       cur_where->print(str, query_type);
26965     else
26966       str->append(cond_value != Item::COND_FALSE ? "1" : "0");
26967   }
26968 
26969   // group by & olap
26970   if (group_list.elements)
26971   {
26972     str->append(STRING_WITH_LEN(" group by "));
26973     print_order(str, group_list.first, query_type);
26974     switch (olap)
26975     {
26976       case CUBE_TYPE:
26977 	str->append(STRING_WITH_LEN(" with cube"));
26978 	break;
26979       case ROLLUP_TYPE:
26980 	str->append(STRING_WITH_LEN(" with rollup"));
26981 	break;
26982       default:
26983 	;  //satisfy compiler
26984     }
26985   }
26986 
26987   // having
26988   Item *cur_having= having;
26989   if (join)
26990     cur_having= join->having;
26991 
26992   if (cur_having || having_value != Item::COND_UNDEF)
26993   {
26994     str->append(STRING_WITH_LEN(" having "));
26995     if (cur_having)
26996       cur_having->print(str, query_type);
26997     else
26998       str->append(having_value != Item::COND_FALSE ? "1" : "0");
26999   }
27000 
27001   if (order_list.elements)
27002   {
27003     str->append(STRING_WITH_LEN(" order by "));
27004     print_order(str, order_list.first, query_type);
27005   }
27006 
27007   // limit
27008   print_limit(thd, str, query_type);
27009 
27010   // lock type
27011   if (lock_type == TL_READ_WITH_SHARED_LOCKS)
27012     str->append(" lock in share mode");
27013   else if (lock_type == TL_WRITE)
27014     str->append(" for update");
27015 
27016   // PROCEDURE unsupported here
27017 }
27018 
27019 
27020 /**
27021   Change the select_result object of the JOIN.
27022 
27023   If old_result is not used, forward the call to the current
27024   select_result in case it is a wrapper around old_result.
27025 
27026   Call prepare() and prepare2() on the new select_result if we decide
27027   to use it.
27028 
27029   @param new_result New select_result object
27030   @param old_result Old select_result object (NULL to force change)
27031 
27032   @retval false Success
27033   @retval true  Error
27034 */
27035 
change_result(select_result * new_result,select_result * old_result)27036 bool JOIN::change_result(select_result *new_result, select_result *old_result)
27037 {
27038   DBUG_ENTER("JOIN::change_result");
27039   if (old_result == NULL || result == old_result)
27040   {
27041     result= new_result;
27042     if (result->prepare(fields_list, select_lex->master_unit()) ||
27043         result->prepare2(this))
27044       DBUG_RETURN(true); /* purecov: inspected */
27045     DBUG_RETURN(false);
27046   }
27047   DBUG_RETURN(result->change_result(new_result));
27048 }
27049 
27050 
27051 /**
27052   @brief
27053   Set allowed types of join caches that can be used for join operations
27054 
27055   @details
27056   The function sets a bitmap of allowed join buffers types in the field
27057   allowed_join_cache_types of this JOIN structure:
27058     bit 1 is set if tjoin buffers are allowed to be incremental
27059     bit 2 is set if the join buffers are allowed to be hashed
27060     but 3 is set if the join buffers are allowed to be used for BKA
27061   join algorithms.
27062   The allowed types are read from system variables.
27063   Besides the function sets maximum allowed join cache level that is
27064   also read from a system variable.
27065 */
27066 
set_allowed_join_cache_types()27067 void JOIN::set_allowed_join_cache_types()
27068 {
27069   allowed_join_cache_types= 0;
27070   if (optimizer_flag(thd, OPTIMIZER_SWITCH_JOIN_CACHE_INCREMENTAL))
27071     allowed_join_cache_types|= JOIN_CACHE_INCREMENTAL_BIT;
27072   if (optimizer_flag(thd, OPTIMIZER_SWITCH_JOIN_CACHE_HASHED))
27073     allowed_join_cache_types|= JOIN_CACHE_HASHED_BIT;
27074   if (optimizer_flag(thd, OPTIMIZER_SWITCH_JOIN_CACHE_BKA))
27075     allowed_join_cache_types|= JOIN_CACHE_BKA_BIT;
27076   allowed_semijoin_with_cache=
27077     optimizer_flag(thd, OPTIMIZER_SWITCH_SEMIJOIN_WITH_CACHE);
27078   allowed_outer_join_with_cache=
27079     optimizer_flag(thd, OPTIMIZER_SWITCH_OUTER_JOIN_WITH_CACHE);
27080   max_allowed_join_cache_level= thd->variables.join_cache_level;
27081 }
27082 
27083 
27084 /**
27085   Save a query execution plan so that the caller can revert to it if needed,
27086   and reset the current query plan so that it can be reoptimized.
27087 
27088   @param save_to  The object into which the current query plan state is saved
27089 */
27090 
save_query_plan(Join_plan_state * save_to)27091 void JOIN::save_query_plan(Join_plan_state *save_to)
27092 {
27093   DYNAMIC_ARRAY tmp_keyuse;
27094   /* Swap the current and the backup keyuse internal arrays. */
27095   tmp_keyuse= keyuse;
27096   keyuse= save_to->keyuse; /* keyuse is reset to an empty array. */
27097   save_to->keyuse= tmp_keyuse;
27098 
27099   for (uint i= 0; i < table_count; i++)
27100   {
27101     save_to->join_tab_keyuse[i]= join_tab[i].keyuse;
27102     join_tab[i].keyuse= NULL;
27103     save_to->join_tab_checked_keys[i]= join_tab[i].checked_keys;
27104     join_tab[i].checked_keys.clear_all();
27105   }
27106   memcpy((uchar*) save_to->best_positions, (uchar*) best_positions,
27107          sizeof(POSITION) * (table_count + 1));
27108   memset((uchar*) best_positions, 0, sizeof(POSITION) * (table_count + 1));
27109 
27110   /* Save SJM nests */
27111   List_iterator<TABLE_LIST> it(select_lex->sj_nests);
27112   TABLE_LIST *tlist;
27113   SJ_MATERIALIZATION_INFO **p_info= save_to->sj_mat_info;
27114   while ((tlist= it++))
27115   {
27116     *(p_info++)= tlist->sj_mat_info;
27117   }
27118 }
27119 
27120 
27121 /**
27122   Reset a query execution plan so that it can be reoptimized in-place.
27123 */
reset_query_plan()27124 void JOIN::reset_query_plan()
27125 {
27126   for (uint i= 0; i < table_count; i++)
27127   {
27128     join_tab[i].keyuse= NULL;
27129     join_tab[i].checked_keys.clear_all();
27130   }
27131 }
27132 
27133 
27134 /**
27135   Restore a query execution plan previously saved by the caller.
27136 
27137   @param The object from which the current query plan state is restored.
27138 */
27139 
restore_query_plan(Join_plan_state * restore_from)27140 void JOIN::restore_query_plan(Join_plan_state *restore_from)
27141 {
27142   DYNAMIC_ARRAY tmp_keyuse;
27143   tmp_keyuse= keyuse;
27144   keyuse= restore_from->keyuse;
27145   restore_from->keyuse= tmp_keyuse;
27146 
27147   for (uint i= 0; i < table_count; i++)
27148   {
27149     join_tab[i].keyuse= restore_from->join_tab_keyuse[i];
27150     join_tab[i].checked_keys= restore_from->join_tab_checked_keys[i];
27151   }
27152 
27153   memcpy((uchar*) best_positions, (uchar*) restore_from->best_positions,
27154          sizeof(POSITION) * (table_count + 1));
27155   /* Restore SJM nests */
27156   List_iterator<TABLE_LIST> it(select_lex->sj_nests);
27157   TABLE_LIST *tlist;
27158   SJ_MATERIALIZATION_INFO **p_info= restore_from->sj_mat_info;
27159   while ((tlist= it++))
27160   {
27161     tlist->sj_mat_info= *(p_info++);
27162   }
27163 }
27164 
27165 
27166 /**
27167   Reoptimize a query plan taking into account an additional conjunct to the
27168   WHERE clause.
27169 
27170   @param added_where  An extra conjunct to the WHERE clause to reoptimize with
27171   @param join_tables  The set of tables to reoptimize
27172   @param save_to      If != NULL, save here the state of the current query plan,
27173                       otherwise reuse the existing query plan structures.
27174 
27175   @notes
27176   Given a query plan that was already optimized taking into account some WHERE
27177   clause 'C', reoptimize this plan with a new WHERE clause 'C AND added_where'.
27178   The reoptimization works as follows:
27179 
27180   1. Call update_ref_and_keys *only* for the new conditions 'added_where'
27181      that are about to be injected into the query.
27182   2. Expand if necessary the original KEYUSE array JOIN::keyuse to
27183      accommodate the new REF accesses computed for the 'added_where' condition.
27184   3. Add the new KEYUSEs into JOIN::keyuse.
27185   4. Re-sort and re-filter the JOIN::keyuse array with the newly added
27186      KEYUSE elements.
27187 
27188   @retval REOPT_NEW_PLAN  there is a new plan.
27189   @retval REOPT_OLD_PLAN  no new improved plan was produced, use the old one.
27190   @retval REOPT_ERROR     an irrecovarable error occurred during reoptimization.
27191 */
27192 
27193 JOIN::enum_reopt_result
reoptimize(Item * added_where,table_map join_tables,Join_plan_state * save_to)27194 JOIN::reoptimize(Item *added_where, table_map join_tables,
27195                  Join_plan_state *save_to)
27196 {
27197   DYNAMIC_ARRAY added_keyuse;
27198   SARGABLE_PARAM *sargables= 0; /* Used only as a dummy parameter. */
27199   uint org_keyuse_elements;
27200 
27201   /* Re-run the REF optimizer to take into account the new conditions. */
27202   if (update_ref_and_keys(thd, &added_keyuse, join_tab, table_count, added_where,
27203                           ~outer_join, select_lex, &sargables))
27204   {
27205     delete_dynamic(&added_keyuse);
27206     return REOPT_ERROR;
27207   }
27208 
27209   if (!added_keyuse.elements)
27210   {
27211     delete_dynamic(&added_keyuse);
27212     return REOPT_OLD_PLAN;
27213   }
27214 
27215   if (save_to)
27216     save_query_plan(save_to);
27217   else
27218     reset_query_plan();
27219 
27220   if (!keyuse.buffer &&
27221       my_init_dynamic_array(&keyuse, sizeof(KEYUSE), 20, 64,
27222                             MYF(MY_THREAD_SPECIFIC)))
27223   {
27224     delete_dynamic(&added_keyuse);
27225     return REOPT_ERROR;
27226   }
27227 
27228   org_keyuse_elements= save_to ? save_to->keyuse.elements : keyuse.elements;
27229   allocate_dynamic(&keyuse, org_keyuse_elements + added_keyuse.elements);
27230 
27231   /* If needed, add the access methods from the original query plan. */
27232   if (save_to)
27233   {
27234     DBUG_ASSERT(!keyuse.elements);
27235     keyuse.elements= save_to->keyuse.elements;
27236     if (size_t e= keyuse.elements)
27237       memcpy(keyuse.buffer,
27238              save_to->keyuse.buffer, e * keyuse.size_of_element);
27239   }
27240 
27241   /* Add the new access methods to the keyuse array. */
27242   memcpy(keyuse.buffer + keyuse.elements * keyuse.size_of_element,
27243          added_keyuse.buffer,
27244          (size_t) added_keyuse.elements * added_keyuse.size_of_element);
27245   keyuse.elements+= added_keyuse.elements;
27246   /* added_keyuse contents is copied, and it is no longer needed. */
27247   delete_dynamic(&added_keyuse);
27248 
27249   if (sort_and_filter_keyuse(thd, &keyuse, true))
27250     return REOPT_ERROR;
27251   optimize_keyuse(this, &keyuse);
27252 
27253   if (optimize_semijoin_nests(this, join_tables))
27254     return REOPT_ERROR;
27255 
27256   /* Re-run the join optimizer to compute a new query plan. */
27257   if (choose_plan(this, join_tables))
27258     return REOPT_ERROR;
27259 
27260   return REOPT_NEW_PLAN;
27261 }
27262 
27263 
27264 /**
27265   Cache constant expressions in WHERE, HAVING, ON conditions.
27266 */
27267 
cache_const_exprs()27268 void JOIN::cache_const_exprs()
27269 {
27270   bool cache_flag= FALSE;
27271   bool *analyzer_arg= &cache_flag;
27272 
27273   /* No need in cache if all tables are constant. */
27274   if (const_tables == table_count)
27275     return;
27276 
27277   if (conds)
27278     conds->compile(thd, &Item::cache_const_expr_analyzer, (uchar **)&analyzer_arg,
27279                   &Item::cache_const_expr_transformer, (uchar *)&cache_flag);
27280   cache_flag= FALSE;
27281   if (having)
27282     having->compile(thd, &Item::cache_const_expr_analyzer, (uchar **)&analyzer_arg,
27283                     &Item::cache_const_expr_transformer, (uchar *)&cache_flag);
27284 
27285   for (JOIN_TAB *tab= first_depth_first_tab(this); tab;
27286        tab= next_depth_first_tab(this, tab))
27287   {
27288     if (*tab->on_expr_ref)
27289     {
27290       cache_flag= FALSE;
27291       (*tab->on_expr_ref)->compile(thd, &Item::cache_const_expr_analyzer,
27292                                  (uchar **)&analyzer_arg,
27293                                  &Item::cache_const_expr_transformer,
27294                                  (uchar *)&cache_flag);
27295     }
27296   }
27297 }
27298 
27299 
27300 /*
27301   Get a cost of reading rows_limit rows through index keynr.
27302 
27303   @detail
27304    - If there is a quick select, we try to use it.
27305    - if there is a ref(const) access, we try to use it, too.
27306    - quick and ref(const) use different cost formulas, so if both are possible
27307       we should make a cost-based choice.
27308 
27309   @param  tab              JOIN_TAB with table access (is NULL for single-table
27310                            UPDATE/DELETE)
27311   @param  read_time OUT    Cost of reading using quick or ref(const) access.
27312 
27313 
27314   @return
27315     true   There was a possible quick or ref access, its cost is in the OUT
27316            parameters.
27317     false  No quick or ref(const) possible (and so, the caller will attempt
27318            to use a full index scan on this index).
27319 */
27320 
get_range_limit_read_cost(const JOIN_TAB * tab,const TABLE * table,uint keynr,ha_rows rows_limit,double * read_time)27321 static bool get_range_limit_read_cost(const JOIN_TAB *tab,
27322                                       const TABLE *table,
27323                                       uint keynr,
27324                                       ha_rows rows_limit,
27325                                       double *read_time)
27326 {
27327   bool res= false;
27328   /*
27329     We need to adjust the estimates if we had a quick select (or ref(const)) on
27330     index keynr.
27331   */
27332   if (table->quick_keys.is_set(keynr))
27333   {
27334     /*
27335       Start from quick select's rows and cost. These are always cheaper than
27336       full index scan/cost.
27337     */
27338     double best_rows= (double)table->quick_rows[keynr];
27339     double best_cost= (double)table->quick_costs[keynr];
27340 
27341     /*
27342       Check if ref(const) access was possible on this index.
27343     */
27344     if (tab)
27345     {
27346       key_part_map map= 1;
27347       uint kp;
27348       /* Find how many key parts would be used by ref(const) */
27349       for (kp=0; kp < MAX_REF_PARTS; map=map << 1, kp++)
27350       {
27351         if (!(table->const_key_parts[keynr] & map))
27352           break;
27353       }
27354 
27355       if (kp > 0)
27356       {
27357         ha_rows ref_rows;
27358         /*
27359           Two possible cases:
27360           1. ref(const) uses the same #key parts as range access.
27361           2. ref(const) uses fewer key parts, becasue there is a
27362             range_cond(key_part+1).
27363         */
27364         if (kp == table->quick_key_parts[keynr])
27365           ref_rows= table->quick_rows[keynr];
27366         else
27367           ref_rows= (ha_rows) table->key_info[keynr].actual_rec_per_key(kp-1);
27368 
27369         if (ref_rows > 0)
27370         {
27371           double tmp= (double)ref_rows;
27372           /* Reuse the cost formula from best_access_path: */
27373           set_if_smaller(tmp, (double) tab->join->thd->variables.max_seeks_for_key);
27374           if (table->covering_keys.is_set(keynr))
27375             tmp= table->file->keyread_time(keynr, 1, (ha_rows) tmp);
27376           else
27377             tmp= table->file->read_time(keynr, 1,
27378                                         (ha_rows) MY_MIN(tmp,tab->worst_seeks));
27379           if (tmp < best_cost)
27380           {
27381             best_cost= tmp;
27382             best_rows= (double)ref_rows;
27383           }
27384         }
27385       }
27386     }
27387 
27388     if (best_rows > rows_limit)
27389     {
27390       /*
27391         LIMIT clause specifies that we will need to read fewer records than
27392         quick select will return. Assume that quick select's cost is
27393         proportional to the number of records we need to return (e.g. if we
27394         only need 1/3rd of records, it will cost us 1/3rd of quick select's
27395         read time)
27396       */
27397       best_cost *= rows_limit / best_rows;
27398     }
27399     *read_time= best_cost;
27400     res= true;
27401   }
27402   return res;
27403 }
27404 
27405 
27406 /**
27407   Find a cheaper access key than a given @a key
27408 
27409   @param          tab                 NULL or JOIN_TAB of the accessed table
27410   @param          order               Linked list of ORDER BY arguments
27411   @param          table               Table if tab == NULL or tab->table
27412   @param          usable_keys         Key map to find a cheaper key in
27413   @param          ref_key
27414                    0 <= key < MAX_KEY  - Key that is currently used for finding
27415                                          row
27416                    MAX_KEY             - means index_merge is used
27417                    -1                  - means we're currently not using an
27418                                          index to find rows.
27419 
27420   @param          select_limit        LIMIT value
27421   @param [out]    new_key             Key number if success, otherwise undefined
27422   @param [out]    new_key_direction   Return -1 (reverse) or +1 if success,
27423                                       otherwise undefined
27424   @param [out]    new_select_limit    Return adjusted LIMIT
27425   @param [out]    new_used_key_parts  NULL by default, otherwise return number
27426                                       of new_key prefix columns if success
27427                                       or undefined if the function fails
27428   @param [out]  saved_best_key_parts  NULL by default, otherwise preserve the
27429                                       value for further use in QUICK_SELECT_DESC
27430 
27431   @note
27432     This function takes into account table->quick_condition_rows statistic
27433     (that is calculated by the make_join_statistics function).
27434     However, single table procedures such as mysql_update() and mysql_delete()
27435     never call make_join_statistics, so they have to update it manually
27436     (@see get_index_for_order()).
27437 */
27438 
27439 static bool
test_if_cheaper_ordering(const JOIN_TAB * tab,ORDER * order,TABLE * table,key_map usable_keys,int ref_key,ha_rows select_limit_arg,int * new_key,int * new_key_direction,ha_rows * new_select_limit,uint * new_used_key_parts,uint * saved_best_key_parts)27440 test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
27441                          key_map usable_keys,  int ref_key,
27442                          ha_rows select_limit_arg,
27443                          int *new_key, int *new_key_direction,
27444                          ha_rows *new_select_limit, uint *new_used_key_parts,
27445                          uint *saved_best_key_parts)
27446 {
27447   DBUG_ENTER("test_if_cheaper_ordering");
27448   /*
27449     Check whether there is an index compatible with the given order
27450     usage of which is cheaper than usage of the ref_key index (ref_key>=0)
27451     or a table scan.
27452     It may be the case if ORDER/GROUP BY is used with LIMIT.
27453   */
27454   ha_rows best_select_limit= HA_POS_ERROR;
27455   JOIN *join= tab ? tab->join : NULL;
27456   uint nr;
27457   key_map keys;
27458   uint best_key_parts= 0;
27459   int best_key_direction= 0;
27460   ha_rows best_records= 0;
27461   double read_time;
27462   int best_key= -1;
27463   bool is_best_covering= FALSE;
27464   double fanout= 1;
27465   ha_rows table_records= table->stat_records();
27466   bool group= join && join->group && order == join->group_list;
27467   ha_rows refkey_rows_estimate= table->quick_condition_rows;
27468   const bool has_limit= (select_limit_arg != HA_POS_ERROR);
27469 
27470   /*
27471     If not used with LIMIT, only use keys if the whole query can be
27472     resolved with a key;  This is because filesort() is usually faster than
27473     retrieving all rows through an index.
27474   */
27475   if (select_limit_arg >= table_records)
27476   {
27477     keys= *table->file->keys_to_use_for_scanning();
27478     keys.merge(table->covering_keys);
27479 
27480     /*
27481       We are adding here also the index specified in FORCE INDEX clause,
27482       if any.
27483       This is to allow users to use index in ORDER BY.
27484     */
27485     if (table->force_index)
27486       keys.merge(group ? table->keys_in_use_for_group_by :
27487                          table->keys_in_use_for_order_by);
27488     keys.intersect(usable_keys);
27489   }
27490   else
27491     keys= usable_keys;
27492 
27493   if (join)
27494   {
27495     uint tablenr= (uint)(tab - join->join_tab);
27496     read_time= join->best_positions[tablenr].read_time;
27497     for (uint i= tablenr+1; i < join->table_count; i++)
27498       fanout*= join->best_positions[i].records_read; // fanout is always >= 1
27499   }
27500   else
27501     read_time= table->file->scan_time();
27502 
27503   /*
27504     TODO: add cost of sorting here.
27505   */
27506   read_time += COST_EPS;
27507 
27508   /*
27509     Calculate the selectivity of the ref_key for REF_ACCESS. For
27510     RANGE_ACCESS we use table->quick_condition_rows.
27511   */
27512   if (ref_key >= 0 && ref_key != MAX_KEY && tab->type == JT_REF)
27513   {
27514     /*
27515       If ref access uses keypart=const for all its key parts,
27516       and quick select uses the same # of key parts, then they are equivalent.
27517       Reuse #rows estimate from quick select as it is more precise.
27518     */
27519     if (tab->ref.const_ref_part_map ==
27520         make_prev_keypart_map(tab->ref.key_parts) &&
27521         table->quick_keys.is_set(ref_key) &&
27522         table->quick_key_parts[ref_key] == tab->ref.key_parts)
27523       refkey_rows_estimate= table->quick_rows[ref_key];
27524     else
27525     {
27526       const KEY *ref_keyinfo= table->key_info + ref_key;
27527       refkey_rows_estimate= ref_keyinfo->rec_per_key[tab->ref.key_parts - 1];
27528     }
27529     set_if_bigger(refkey_rows_estimate, 1);
27530   }
27531 
27532   for (nr=0; nr < table->s->keys ; nr++)
27533   {
27534     int direction;
27535     ha_rows select_limit= select_limit_arg;
27536     uint used_key_parts= 0;
27537 
27538     if (keys.is_set(nr) &&
27539         (direction= test_if_order_by_key(join, order, table, nr,
27540                                          &used_key_parts)))
27541     {
27542       /*
27543         At this point we are sure that ref_key is a non-ordering
27544         key (where "ordering key" is a key that will return rows
27545         in the order required by ORDER BY).
27546       */
27547       DBUG_ASSERT (ref_key != (int) nr);
27548 
27549       bool is_covering= (table->covering_keys.is_set(nr) ||
27550                          (table->file->index_flags(nr, 0, 1) &
27551                           HA_CLUSTERED_INDEX));
27552       /*
27553         Don't use an index scan with ORDER BY without limit.
27554         For GROUP BY without limit always use index scan
27555         if there is a suitable index.
27556         Why we hold to this asymmetry hardly can be explained
27557         rationally. It's easy to demonstrate that using
27558         temporary table + filesort could be cheaper for grouping
27559         queries too.
27560       */
27561       if (is_covering ||
27562           select_limit != HA_POS_ERROR ||
27563           (ref_key < 0 && (group || table->force_index)))
27564       {
27565         double rec_per_key;
27566         double index_scan_time;
27567         KEY *keyinfo= table->key_info+nr;
27568         if (select_limit == HA_POS_ERROR)
27569           select_limit= table_records;
27570         if (group)
27571         {
27572           /*
27573             Used_key_parts can be larger than keyinfo->user_defined_key_parts
27574             when using a secondary index clustered with a primary
27575             key (e.g. as in Innodb).
27576             See Bug #28591 for details.
27577           */
27578           uint used_index_parts= keyinfo->user_defined_key_parts;
27579           uint used_pk_parts= 0;
27580           if (used_key_parts > used_index_parts)
27581             used_pk_parts= used_key_parts-used_index_parts;
27582           rec_per_key= used_key_parts ?
27583 	               keyinfo->actual_rec_per_key(used_key_parts-1) : 1;
27584           /* Take into account the selectivity of the used pk prefix */
27585           if (used_pk_parts)
27586 	  {
27587             KEY *pkinfo=tab->table->key_info+table->s->primary_key;
27588             /*
27589               If the values of of records per key for the prefixes
27590               of the primary key are considered unknown we assume
27591               they are equal to 1.
27592 	    */
27593             if (used_key_parts == pkinfo->user_defined_key_parts ||
27594                 pkinfo->rec_per_key[0] == 0)
27595               rec_per_key= 1;
27596             if (rec_per_key > 1)
27597 	    {
27598               rec_per_key*= pkinfo->actual_rec_per_key(used_pk_parts-1);
27599               rec_per_key/= pkinfo->actual_rec_per_key(0);
27600               /*
27601                 The value of rec_per_key for the extended key has
27602                 to be adjusted accordingly if some components of
27603                 the secondary key are included in the primary key.
27604 	      */
27605                for(uint i= 1; i < used_pk_parts; i++)
27606 	      {
27607 	        if (pkinfo->key_part[i].field->key_start.is_set(nr))
27608 	        {
27609                   /*
27610                     We presume here that for any index rec_per_key[i] != 0
27611                     if rec_per_key[0] != 0.
27612 	          */
27613                   DBUG_ASSERT(pkinfo->actual_rec_per_key(i));
27614                   rec_per_key*= pkinfo->actual_rec_per_key(i-1);
27615                   rec_per_key/= pkinfo->actual_rec_per_key(i);
27616                 }
27617 	      }
27618             }
27619           }
27620           set_if_bigger(rec_per_key, 1);
27621           /*
27622             With a grouping query each group containing on average
27623             rec_per_key records produces only one row that will
27624             be included into the result set.
27625           */
27626           if (select_limit > table_records/rec_per_key)
27627             select_limit= table_records;
27628           else
27629             select_limit= (ha_rows) (select_limit*rec_per_key);
27630         } /* group */
27631 
27632         /*
27633           If tab=tk is not the last joined table tn then to get first
27634           L records from the result set we can expect to retrieve
27635           only L/fanout(tk,tn) where fanout(tk,tn) says how many
27636           rows in the record set on average will match each row tk.
27637           Usually our estimates for fanouts are too pessimistic.
27638           So the estimate for L/fanout(tk,tn) will be too optimistic
27639           and as result we'll choose an index scan when using ref/range
27640           access + filesort will be cheaper.
27641         */
27642         select_limit= (ha_rows) (select_limit < fanout ?
27643                                  1 : select_limit/fanout);
27644         /*
27645           We assume that each of the tested indexes is not correlated
27646           with ref_key. Thus, to select first N records we have to scan
27647           N/selectivity(ref_key) index entries.
27648           selectivity(ref_key) = #scanned_records/#table_records =
27649           refkey_rows_estimate/table_records.
27650           In any case we can't select more than #table_records.
27651           N/(refkey_rows_estimate/table_records) > table_records
27652           <=> N > refkey_rows_estimate.
27653          */
27654         if (select_limit > refkey_rows_estimate)
27655           select_limit= table_records;
27656         else
27657           select_limit= (ha_rows) (select_limit *
27658                                    (double) table_records /
27659                                     refkey_rows_estimate);
27660         rec_per_key= keyinfo->actual_rec_per_key(keyinfo->user_defined_key_parts-1);
27661         set_if_bigger(rec_per_key, 1);
27662         /*
27663           Here we take into account the fact that rows are
27664           accessed in sequences rec_per_key records in each.
27665           Rows in such a sequence are supposed to be ordered
27666           by rowid/primary key. When reading the data
27667           in a sequence we'll touch not more pages than the
27668           table file contains.
27669           TODO. Use the formula for a disk sweep sequential access
27670           to calculate the cost of accessing data rows for one
27671           index entry.
27672         */
27673         index_scan_time= select_limit/rec_per_key *
27674                          MY_MIN(rec_per_key, table->file->scan_time());
27675         double range_scan_time;
27676         if (get_range_limit_read_cost(tab, table, nr, select_limit,
27677                                        &range_scan_time))
27678         {
27679           if (range_scan_time < index_scan_time)
27680             index_scan_time= range_scan_time;
27681         }
27682 
27683         if ((ref_key < 0 && (group || table->force_index || is_covering)) ||
27684             index_scan_time < read_time)
27685         {
27686           ha_rows quick_records= table_records;
27687           ha_rows refkey_select_limit= (ref_key >= 0 &&
27688                                         !is_hash_join_key_no(ref_key) &&
27689                                         table->covering_keys.is_set(ref_key)) ?
27690                                         refkey_rows_estimate :
27691                                         HA_POS_ERROR;
27692           if ((is_best_covering && !is_covering) ||
27693               (is_covering && refkey_select_limit < select_limit))
27694             continue;
27695           if (table->quick_keys.is_set(nr))
27696             quick_records= table->quick_rows[nr];
27697           if (best_key < 0 ||
27698               (select_limit <= MY_MIN(quick_records,best_records) ?
27699                keyinfo->user_defined_key_parts < best_key_parts :
27700                quick_records < best_records) ||
27701               (!is_best_covering && is_covering))
27702           {
27703             best_key= nr;
27704             best_key_parts= keyinfo->user_defined_key_parts;
27705             if (saved_best_key_parts)
27706               *saved_best_key_parts= used_key_parts;
27707             best_records= quick_records;
27708             is_best_covering= is_covering;
27709             best_key_direction= direction;
27710             best_select_limit= select_limit;
27711           }
27712         }
27713       }
27714     }
27715   }
27716 
27717   if (best_key < 0 || best_key == ref_key)
27718     DBUG_RETURN(FALSE);
27719 
27720   *new_key= best_key;
27721   *new_key_direction= best_key_direction;
27722   *new_select_limit= has_limit ? best_select_limit : table_records;
27723   if (new_used_key_parts != NULL)
27724     *new_used_key_parts= best_key_parts;
27725 
27726   DBUG_RETURN(TRUE);
27727 }
27728 
27729 
27730 /**
27731   Find a key to apply single table UPDATE/DELETE by a given ORDER
27732 
27733   @param       order           Linked list of ORDER BY arguments
27734   @param       table           Table to find a key
27735   @param       select          Pointer to access/update select->quick (if any)
27736   @param       limit           LIMIT clause parameter
27737   @param [out] scanned_limit   How many records we expect to scan
27738                                Valid if *need_sort=FALSE.
27739   @param [out] need_sort       TRUE if filesort needed
27740   @param [out] reverse
27741     TRUE if the key is reversed again given ORDER (undefined if key == MAX_KEY)
27742 
27743   @return
27744     - MAX_KEY if no key found                        (need_sort == TRUE)
27745     - MAX_KEY if quick select result order is OK     (need_sort == FALSE)
27746     - key number (either index scan or quick select) (need_sort == FALSE)
27747 
27748   @note
27749     Side effects:
27750     - may deallocate or deallocate and replace select->quick;
27751     - may set table->quick_condition_rows and table->quick_rows[...]
27752       to table->file->stats.records.
27753 */
27754 
get_index_for_order(ORDER * order,TABLE * table,SQL_SELECT * select,ha_rows limit,ha_rows * scanned_limit,bool * need_sort,bool * reverse)27755 uint get_index_for_order(ORDER *order, TABLE *table, SQL_SELECT *select,
27756                          ha_rows limit, ha_rows *scanned_limit,
27757                          bool *need_sort, bool *reverse)
27758 {
27759   if (!order)
27760   {
27761     *need_sort= FALSE;
27762     if (select && select->quick)
27763       return select->quick->index; // index or MAX_KEY, use quick select as is
27764     else
27765       return table->file->key_used_on_scan; // MAX_KEY or index for some engines
27766   }
27767 
27768   if (!is_simple_order(order)) // just to cut further expensive checks
27769   {
27770     *need_sort= TRUE;
27771     return MAX_KEY;
27772   }
27773 
27774   if (select && select->quick)
27775   {
27776     if (select->quick->index == MAX_KEY)
27777     {
27778       *need_sort= TRUE;
27779       return MAX_KEY;
27780     }
27781 
27782     uint used_key_parts;
27783     switch (test_if_order_by_key(NULL, order, table, select->quick->index,
27784                                  &used_key_parts)) {
27785     case 1: // desired order
27786       *need_sort= FALSE;
27787       *scanned_limit= MY_MIN(limit, select->quick->records);
27788       return select->quick->index;
27789     case 0: // unacceptable order
27790       *need_sort= TRUE;
27791       return MAX_KEY;
27792     case -1: // desired order, but opposite direction
27793       {
27794         QUICK_SELECT_I *reverse_quick;
27795         if ((reverse_quick=
27796                select->quick->make_reverse(used_key_parts)))
27797         {
27798           select->set_quick(reverse_quick);
27799           *need_sort= FALSE;
27800           *scanned_limit= MY_MIN(limit, select->quick->records);
27801           return select->quick->index;
27802         }
27803         else
27804         {
27805           *need_sort= TRUE;
27806           return MAX_KEY;
27807         }
27808       }
27809     }
27810     DBUG_ASSERT(0);
27811   }
27812   else if (limit != HA_POS_ERROR)
27813   { // check if some index scan & LIMIT is more efficient than filesort
27814 
27815     /*
27816       Update quick_condition_rows since single table UPDATE/DELETE procedures
27817       don't call make_join_statistics() and leave this variable uninitialized.
27818     */
27819     table->quick_condition_rows= table->stat_records();
27820 
27821     int key, direction;
27822     if (test_if_cheaper_ordering(NULL, order, table,
27823                                  table->keys_in_use_for_order_by, -1,
27824                                  limit,
27825                                  &key, &direction, &limit) &&
27826         !is_key_used(table, key, table->write_set))
27827     {
27828       *need_sort= FALSE;
27829       *scanned_limit= limit;
27830       *reverse= (direction < 0);
27831       return key;
27832     }
27833   }
27834   *need_sort= TRUE;
27835   return MAX_KEY;
27836 }
27837 
27838 
27839 /*
27840   Count how many times the specified conditions are true for first rows_to_read
27841   rows of the table.
27842 
27843   @param thd                  Thread handle
27844   @param rows_to_read         How many rows to sample
27845   @param table                Table to use
27846   @conds conds         INOUT  List of conditions and counters for them
27847 
27848   @return Number of we've checked. It can be equal or less than rows_to_read.
27849           0 is returned for error or when the table had no rows.
27850 */
27851 
check_selectivity(THD * thd,ulong rows_to_read,TABLE * table,List<COND_STATISTIC> * conds)27852 ulong check_selectivity(THD *thd,
27853                         ulong rows_to_read,
27854                         TABLE *table,
27855                         List<COND_STATISTIC> *conds)
27856 {
27857   ulong count= 0;
27858   COND_STATISTIC *cond;
27859   List_iterator_fast<COND_STATISTIC> it(*conds);
27860   handler *file= table->file;
27861   uchar *record= table->record[0];
27862   int error= 0;
27863   DBUG_ENTER("check_selectivity");
27864 
27865   DBUG_ASSERT(rows_to_read > 0);
27866   while ((cond= it++))
27867   {
27868     DBUG_ASSERT(cond->cond);
27869     DBUG_ASSERT(cond->cond->used_tables() == table->map);
27870     cond->positive= 0;
27871   }
27872   it.rewind();
27873 
27874   if (unlikely(file->ha_rnd_init_with_error(1)))
27875     DBUG_RETURN(0);
27876   do
27877   {
27878     error= file->ha_rnd_next(record);
27879 
27880     if (unlikely(thd->killed))
27881     {
27882       thd->send_kill_message();
27883       count= 0;
27884       goto err;
27885     }
27886     if (unlikely(error))
27887     {
27888       if (error == HA_ERR_END_OF_FILE)
27889 	break;
27890       goto err;
27891     }
27892 
27893     count++;
27894     while ((cond= it++))
27895     {
27896       if (cond->cond->val_bool())
27897         cond->positive++;
27898     }
27899     it.rewind();
27900 
27901   } while (count < rows_to_read);
27902 
27903   file->ha_rnd_end();
27904   DBUG_RETURN(count);
27905 
27906 err:
27907   DBUG_PRINT("error", ("error %d", error));
27908   file->ha_rnd_end();
27909   DBUG_RETURN(0);
27910 }
27911 
27912 /****************************************************************************
27913   AGGR_OP implementation
27914 ****************************************************************************/
27915 
27916 /**
27917   @brief Instantiate tmp table for aggregation and start index scan if needed
27918   @todo Tmp table always would be created, even for empty result. Extend
27919         executor to avoid tmp table creation when no rows were written
27920         into tmp table.
27921   @return
27922     true  error
27923     false ok
27924 */
27925 
27926 bool
prepare_tmp_table()27927 AGGR_OP::prepare_tmp_table()
27928 {
27929   TABLE *table= join_tab->table;
27930   JOIN *join= join_tab->join;
27931   int rc= 0;
27932 
27933   if (!join_tab->table->is_created())
27934   {
27935     if (instantiate_tmp_table(table, join_tab->tmp_table_param->keyinfo,
27936                               join_tab->tmp_table_param->start_recinfo,
27937                               &join_tab->tmp_table_param->recinfo,
27938                               join->select_options))
27939       return true;
27940     (void) table->file->extra(HA_EXTRA_WRITE_CACHE);
27941   }
27942   /* If it wasn't already, start index scan for grouping using table index. */
27943   if (!table->file->inited && table->group &&
27944       join_tab->tmp_table_param->sum_func_count && table->s->keys)
27945     rc= table->file->ha_index_init(0, 0);
27946   else
27947   {
27948     /* Start index scan in scanning mode */
27949     rc= table->file->ha_rnd_init(true);
27950   }
27951   if (rc)
27952   {
27953     table->file->print_error(rc, MYF(0));
27954     return true;
27955   }
27956   return false;
27957 }
27958 
27959 
27960 /**
27961   @brief Prepare table if necessary and call write_func to save record
27962 
27963   @param end_of_records  the end_of_record signal to pass to the writer
27964 
27965   @return return one of enum_nested_loop_state.
27966 */
27967 
27968 enum_nested_loop_state
put_record(bool end_of_records)27969 AGGR_OP::put_record(bool end_of_records)
27970 {
27971   // Lasy tmp table creation/initialization
27972   if (!join_tab->table->file->inited)
27973     if (prepare_tmp_table())
27974       return NESTED_LOOP_ERROR;
27975   enum_nested_loop_state rc= (*write_func)(join_tab->join, join_tab,
27976                                            end_of_records);
27977   return rc;
27978 }
27979 
27980 
27981 /**
27982   @brief Finish rnd/index scan after accumulating records, switch ref_array,
27983          and send accumulated records further.
27984   @return return one of enum_nested_loop_state.
27985 */
27986 
27987 enum_nested_loop_state
end_send()27988 AGGR_OP::end_send()
27989 {
27990   enum_nested_loop_state rc= NESTED_LOOP_OK;
27991   TABLE *table= join_tab->table;
27992   JOIN *join= join_tab->join;
27993 
27994   // All records were stored, send them further
27995   int tmp, new_errno= 0;
27996 
27997   if ((rc= put_record(true)) < NESTED_LOOP_OK)
27998     return rc;
27999 
28000   if ((tmp= table->file->extra(HA_EXTRA_NO_CACHE)))
28001   {
28002     DBUG_PRINT("error",("extra(HA_EXTRA_NO_CACHE) failed"));
28003     new_errno= tmp;
28004   }
28005   if ((tmp= table->file->ha_index_or_rnd_end()))
28006   {
28007     DBUG_PRINT("error",("ha_index_or_rnd_end() failed"));
28008     new_errno= tmp;
28009   }
28010   if (new_errno)
28011   {
28012     table->file->print_error(new_errno,MYF(0));
28013     return NESTED_LOOP_ERROR;
28014   }
28015 
28016   // Update ref array
28017   join_tab->join->set_items_ref_array(*join_tab->ref_array);
28018   bool keep_last_filesort_result = join_tab->filesort ? false : true;
28019   if (join_tab->window_funcs_step)
28020   {
28021     if (join_tab->window_funcs_step->exec(join, keep_last_filesort_result))
28022       return NESTED_LOOP_ERROR;
28023   }
28024 
28025   table->reginfo.lock_type= TL_UNLOCK;
28026 
28027   bool in_first_read= true;
28028 
28029   /*
28030      Reset the counter before copying rows from internal temporary table to
28031      INSERT table.
28032   */
28033   join_tab->join->thd->get_stmt_da()->reset_current_row_for_warning();
28034   while (rc == NESTED_LOOP_OK)
28035   {
28036     int error;
28037     if (in_first_read)
28038     {
28039       in_first_read= false;
28040       error= join_init_read_record(join_tab);
28041     }
28042     else
28043       error= join_tab->read_record.read_record();
28044 
28045     if (unlikely(error > 0 || (join->thd->is_error())))   // Fatal error
28046       rc= NESTED_LOOP_ERROR;
28047     else if (error < 0)
28048       break;
28049     else if (unlikely(join->thd->killed))		  // Aborted by user
28050     {
28051       join->thd->send_kill_message();
28052       rc= NESTED_LOOP_KILLED;
28053     }
28054     else
28055     {
28056       rc= evaluate_join_record(join, join_tab, 0);
28057     }
28058   }
28059 
28060   if (keep_last_filesort_result)
28061   {
28062     delete join_tab->filesort_result;
28063     join_tab->filesort_result= NULL;
28064   }
28065 
28066   // Finish rnd scn after sending records
28067   if (join_tab->table->file->inited)
28068     join_tab->table->file->ha_rnd_end();
28069 
28070   return rc;
28071 }
28072 
28073 
28074 /**
28075   @brief
28076   Remove marked top conjuncts of a condition
28077 
28078   @param thd    The thread handle
28079   @param cond   The condition which subformulas are to be removed
28080 
28081   @details
28082     The function removes all top conjuncts marked with the flag
28083     FULL_EXTRACTION_FL from the condition 'cond'. The resulting
28084     formula is returned a the result of the function
28085     If 'cond' s marked with such flag the function returns 0.
28086     The function clear the extraction flags for the removed
28087     formulas
28088 
28089    @retval
28090      condition without removed subformulas
28091      0 if the whole 'cond' is removed
28092 */
28093 
remove_pushed_top_conjuncts(THD * thd,Item * cond)28094 Item *remove_pushed_top_conjuncts(THD *thd, Item *cond)
28095 {
28096   if (cond->get_extraction_flag() == FULL_EXTRACTION_FL)
28097   {
28098     cond->clear_extraction_flag();
28099     return 0;
28100   }
28101   if (cond->type() == Item::COND_ITEM)
28102   {
28103     if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
28104     {
28105       List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
28106       Item *item;
28107       while ((item= li++))
28108       {
28109 	if (item->get_extraction_flag() == FULL_EXTRACTION_FL)
28110 	{
28111 	  item->clear_extraction_flag();
28112 	  li.remove();
28113 	}
28114       }
28115       switch (((Item_cond*) cond)->argument_list()->elements)
28116       {
28117       case 0:
28118 	return 0;
28119       case 1:
28120 	return ((Item_cond*) cond)->argument_list()->head();
28121       default:
28122 	return cond;
28123       }
28124     }
28125   }
28126   return cond;
28127 }
28128 
28129 /*
28130   There are 5 cases in which we shortcut the join optimization process as we
28131   conclude that the join would be a degenerate one
28132     1) IMPOSSIBLE WHERE
28133     2) MIN/MAX optimization (@see opt_sum_query)
28134     3) EMPTY CONST TABLE
28135   If a window function is present in any of the above cases then to get the
28136   result of the window function, we need to execute it. So we need to
28137   create a temporary table for its execution. Here we need to take in mind
28138   that aggregate functions and non-aggregate function need not be executed.
28139 
28140 */
28141 
28142 
handle_implicit_grouping_with_window_funcs()28143 void JOIN::handle_implicit_grouping_with_window_funcs()
28144 {
28145   if (select_lex->have_window_funcs() && send_row_on_empty_set())
28146   {
28147     const_tables= top_join_tab_count= table_count= 0;
28148   }
28149 }
28150 
28151 
28152 /*
28153   @brief
28154     Perform a partial cleanup for the JOIN_TAB structure
28155 
28156   @note
28157     this is used to cleanup resources for the re-execution of correlated
28158     subqueries.
28159 */
partial_cleanup()28160 void JOIN_TAB::partial_cleanup()
28161 {
28162   if (!table)
28163     return;
28164 
28165   if (table->is_created())
28166   {
28167     table->file->ha_index_or_rnd_end();
28168     DBUG_PRINT("info", ("close index: %s.%s  alias: %s",
28169                table->s->db.str,
28170                table->s->table_name.str,
28171                table->alias.c_ptr()));
28172     if (aggr)
28173     {
28174       int tmp= 0;
28175       if ((tmp= table->file->extra(HA_EXTRA_NO_CACHE)))
28176         table->file->print_error(tmp, MYF(0));
28177     }
28178   }
28179   delete filesort_result;
28180   filesort_result= NULL;
28181   free_cache(&read_record);
28182 }
28183 
28184 
28185 /**
28186   @} (end of group Query_Optimizer)
28187 */
28188