1 /* Copyright (c) 2000, 2021, Oracle and/or its affiliates.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
22 
23 /**
24   @file
25 
26   @brief
27   Implementation of name resolution stage
28 
29 
30   @defgroup Query_Resolver  Query Resolver
31   @{
32 */
33 
34 #include "sql_select.h"
35 #include "sql_resolver.h"
36 #include "sql_optimizer.h"
37 #include "opt_trace.h"
38 #include "sql_base.h"
39 #include "auth_common.h"
40 #include "opt_explain_format.h"
41 #include "sql_test.h"            // print_where
42 #include "aggregate_check.h"
43 #include "template_utils.h"
44 
45 static void propagate_nullability(List<TABLE_LIST> *tables, bool nullable);
46 
47 static const Item::enum_walk walk_subquery=
48   Item::enum_walk(Item::WALK_POSTFIX | Item::WALK_SUBQUERY);
49 
50 uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list,
51                                    uint first_unused);
52 
53 
54 /**
55   Prepare query block for optimization.
56 
57   Resolve table and column information.
58   Resolve all expressions (item trees), ie WHERE clause, join conditions,
59   GROUP BY clause, HAVING clause, ORDER BY clause, LIMIT clause.
60   Prepare all subqueries recursively as part of resolving the expressions.
61   Apply permanent transformations to the abstract syntax tree, such as
62   semi-join transformation, derived table transformation, elimination of
63   constant values and redundant clauses (e.g ORDER BY, GROUP BY).
64 
65   @param thd    thread handler
66 
67   @returns false if success, true if error
68 
69   @note on privilege checking for SELECT query that possibly contains view
70         or derived table references:
71 
72    - When this function is called, it is assumed that the precheck() function
73      has been called. precheck() ensures that the user has some SELECT
74      privileges to the tables involved in the query. When resolving views
75      it has also been established that the user has some privileges for them.
76      To prepare a view for privilege checking, it is also needed to call
77      check_view_privileges() after views have been merged into the query.
78      This is not necessary for unnamed derived tables since it has already
79      been established that we have SELECT privileges for the underlying tables
80      by the precheck functions. (precheck() checks a query without resolved
81      views, ie. before tables are opened, so underlying tables of views
82      are not yet available).
83 
84    - When a query block is resolved, always ensure that the user has SELECT
85      privileges to the columns referenced in the WHERE clause, the join
86      conditions, the GROUP BY clause, the HAVING clause and the ORDER BY clause.
87 
88    - When resolving the outer-most query block, ensure that the user also has
89      SELECT privileges to the columns in the selected expressions.
90 
91    - When setting up a derived table or view for materialization, ensure that
92      the user has SELECT privileges to the columns in the selected expressions
93 
94    - Column privileges are normally checked by Item_field::fix_fields().
95      Exceptions are select list of derived tables/views which are checked
96      in TABLE_LIST::setup_materialized_derived(), and natural/using join
97      conditions that are checked in mark_common_columns().
98 
99    - As far as INSERT, UPDATE and DELETE statements have the same expressions
100      as a SELECT statement, this note applies to those statements as well.
101 */
prepare(THD * thd)102 bool SELECT_LEX::prepare(THD *thd)
103 {
104   DBUG_ENTER("SELECT_LEX::prepare");
105 
106   // We may do subquery transformation, or Item substitution:
107   Prepare_error_tracker tracker(thd);
108 
109   assert(this == thd->lex->current_select());
110   assert(join == NULL);
111 
112   SELECT_LEX_UNIT *const unit= master_unit();
113 
114   if (top_join_list.elements > 0)
115     propagate_nullability(&top_join_list, false);
116 
117   is_item_list_lookup= true;
118 
119   /*
120     Determine whether immediate derived tables can be merged:
121       - DTs belonging to outermost query block: always
122       - DTs belonging to first level subqueries: Yes if inside SELECT statement,
123         no otherwise (including UPDATE and DELETE).
124         This is required to support a workaround for allowing subqueries
125         containing the same table as is target for delete or update,
126         by forcing a materialization of the subquery.
127       - All other cases inherit status of parent query block.
128   */
129   allow_merge_derived=
130     outer_select() == NULL ||
131     master_unit()->item == NULL ||
132     (outer_select()->outer_select() == NULL ?
133       parent_lex->sql_command == SQLCOM_SELECT :
134       outer_select()->allow_merge_derived);
135 
136   Opt_trace_context * const trace= &thd->opt_trace;
137   Opt_trace_object trace_wrapper(trace);
138   Opt_trace_object trace_prepare(trace, "join_preparation");
139   trace_prepare.add_select_number(select_number);
140   Opt_trace_array trace_steps(trace, "steps");
141 
142   // Initially, "all_fields" is the select list
143   all_fields= fields_list;
144 
145   /* Check that all tables, fields, conds and order are ok */
146 
147   if (!(active_options() & OPTION_SETUP_TABLES_DONE))
148   {
149     if (setup_tables(thd, get_table_list(), false))
150       DBUG_RETURN(true);
151 
152     if (derived_table_count && resolve_derived(thd, true))
153       DBUG_RETURN(true);
154 
155     // Wait with privilege checking until all derived tables are resolved.
156     if (!thd->derived_tables_processing &&
157         check_view_privileges(thd, SELECT_ACL, SELECT_ACL))
158       DBUG_RETURN(true);
159   }
160 
161   // Precompute and store the row types of NATURAL/USING joins.
162   if (leaf_table_count >= 2 &&
163       setup_natural_join_row_types(thd, join_list, &context))
164     DBUG_RETURN(true);
165 
166   Mem_root_array<Item_exists_subselect *, true>
167     sj_candidates_local(thd->mem_root);
168   sj_candidates= &sj_candidates_local;
169 
170   /*
171     Item and Item_field CTORs will both increment some counters
172     in current_select(), based on the current parsing context.
173     We are not parsing anymore: any new Items created now are due to
174     query rewriting, so stop incrementing counters.
175    */
176   assert(parsing_place == CTX_NONE);
177   parsing_place= CTX_NONE;
178 
179   resolve_place= RESOLVE_SELECT_LIST;
180 
181   /*
182     Setup the expressions in the SELECT list. Wait with privilege checking
183     until all derived tables are resolved, except do privilege checking for
184     subqueries inside a derived table.
185   */
186   const bool check_privs= !thd->derived_tables_processing ||
187                           master_unit()->item != NULL;
188   thd->mark_used_columns= check_privs ? MARK_COLUMNS_READ : MARK_COLUMNS_NONE;
189   ulonglong want_privilege_saved= thd->want_privilege;
190   thd->want_privilege= check_privs ? SELECT_ACL : 0;
191 
192   if (with_wild && setup_wild(thd))
193     DBUG_RETURN(true);
194   if (setup_ref_array(thd))
195     DBUG_RETURN(true); /* purecov: inspected */
196 
197   if (setup_fields(thd, ref_ptrs, fields_list, thd->want_privilege,
198                    &all_fields, true, false))
199     DBUG_RETURN(true);
200 
201   resolve_place= RESOLVE_NONE;
202 
203   const nesting_map save_allow_sum_func= thd->lex->allow_sum_func;
204 
205   // Do not allow local set functions for join conditions, WHERE and GROUP BY
206   thd->lex->allow_sum_func&= ~((nesting_map)1 << nest_level);
207 
208   thd->mark_used_columns= MARK_COLUMNS_READ;
209   thd->want_privilege= SELECT_ACL;
210 
211   // Set up join conditions and WHERE clause
212   if (setup_conds(thd))
213     DBUG_RETURN(true);
214 
215   // Set up the GROUP BY clause
216   int all_fields_count= all_fields.elements;
217   if (group_list.elements && setup_group(thd))
218     DBUG_RETURN(true);
219   hidden_group_field_count= all_fields.elements - all_fields_count;
220 
221   // Allow local set functions in HAVING and ORDER BY
222   thd->lex->allow_sum_func|= (nesting_map)1 << nest_level;
223 
224   // Setup the HAVING clause
225   if (m_having_cond)
226   {
227     thd->where="having clause";
228     having_fix_field= true;
229     resolve_place= RESOLVE_HAVING;
230     if (!m_having_cond->fixed &&
231         (m_having_cond->fix_fields(thd, &m_having_cond) ||
232          m_having_cond->check_cols(1)))
233       DBUG_RETURN(true);
234 
235     having_fix_field= false;
236     resolve_place= RESOLVE_NONE;
237   }
238 
239   // Set up the ORDER BY clause
240   all_fields_count= all_fields.elements;
241   if (order_list.elements)
242   {
243     if (setup_order(thd, ref_ptrs, get_table_list(), fields_list, all_fields,
244                     order_list.first))
245       DBUG_RETURN(true);
246     hidden_order_field_count= all_fields.elements - all_fields_count;
247    }
248 
249   // Query block is completely resolved, restore set function allowance
250   thd->lex->allow_sum_func= save_allow_sum_func;
251 
252   thd->want_privilege= want_privilege_saved;
253   /*
254     Permanently remove redundant parts from the query if
255       1) This is a subquery
256       2) This is the first time this query is prepared (since the
257          transformation is permanent)
258       3) Not normalizing a view. Removal should take place when a
259          query involving a view is optimized, not when the view
260          is created
261   */
262   if (unit->item &&                                        // 1)
263       first_execution &&                                   // 2)
264       !(thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW)) // 3)
265   {
266     remove_redundant_subquery_clauses(thd, hidden_group_field_count);
267   }
268 
269   if (order_list.elements &&
270       setup_order_final(thd))
271     DBUG_RETURN(true);     /* purecov: inspected */
272 
273   if (is_distinct() &&
274       is_grouped() &&
275       hidden_group_field_count == 0 &&
276       olap == UNSPECIFIED_OLAP_TYPE)
277   {
278     /*
279       All GROUP expressions are in SELECT list, so resulting rows are distinct.
280       ROLLUP is not specified, so adds no row. So all rows in the result set
281       are distinct, DISTINCT is useless.
282       @todo could remove DISTINCT if ROLLUP were specified and all GROUP
283       expressions were non-nullable, because ROLLUP adds only NULL values.
284       Currently, ROLLUP+DISTINCT is rejected because executor cannot handle
285       it in all cases.
286     */
287     remove_base_options(SELECT_DISTINCT);
288   }
289 
290   /*
291     Printing the expanded query should happen here and not elsewhere, because
292     when a view is merged (when the view is opened in open_tables()), the
293     parent query's select_lex does not yet contain a correct WHERE clause (it
294     misses the view's merged WHERE clause). This is corrected only just above,
295     in TABLE_LIST::prep_where(), called by
296     setup_without_group()->setup_conds().
297     We also have to wait for fix_fields() on HAVING, above.
298     At this stage, we also have properly set up Item_ref-s.
299   */
300   {
301     Opt_trace_object trace_wrapper(trace);
302     opt_trace_print_expanded_query(thd, this, &trace_wrapper);
303   }
304 
305   /*
306     When normalizing a view (like when writing a view's body to the FRM),
307     subquery transformations don't apply (if they did, IN->EXISTS could not be
308     undone in favour of materialization, when optimizing a later statement
309     using the view)
310   */
311   if (unit->item &&                      // This is a subquery
312       this != unit->fake_select_lex &&   // A real query block
313                                          // Not normalizing a view
314       !(thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW))
315   {
316     // Query block represents a subquery within an IN/ANY/ALL/EXISTS predicate
317     if (resolve_subquery(thd))
318       DBUG_RETURN(true);
319   }
320 
321   if (m_having_cond && m_having_cond->with_sum_func)
322     m_having_cond->split_sum_func2(thd, ref_ptrs, all_fields, &m_having_cond,
323                                    true);
324   if (inner_sum_func_list)
325   {
326     Item_sum *end=inner_sum_func_list;
327     Item_sum *item_sum= end;
328     do
329     {
330       item_sum= item_sum->next;
331       item_sum->split_sum_func2(thd, ref_ptrs,
332                                 all_fields, item_sum->ref_by, false);
333     } while (item_sum != end);
334   }
335 
336   if (inner_refs_list.elements && fix_inner_refs(thd))
337     DBUG_RETURN(true);  /* purecov: inspected */
338 
339   if (group_list.elements)
340   {
341     /*
342       Because HEAP tables can't index BIT fields we need to use an
343       additional hidden field for grouping because later it will be
344       converted to a LONG field. Original field will remain of the
345       BIT type and will be returned to a client.
346     */
347     for (ORDER *ord= group_list.first; ord; ord= ord->next)
348     {
349       if ((*ord->item)->type() == Item::FIELD_ITEM &&
350           (*ord->item)->field_type() == MYSQL_TYPE_BIT)
351       {
352         Item_field *field= new Item_field(thd, *(Item_field**)ord->item);
353         ord->item= add_hidden_item(field);
354       }
355     }
356   }
357 
358   if (setup_ftfuncs(this)) /* should be after having->fix_fields */
359     DBUG_RETURN(true);
360 
361   if (query_result() && query_result()->prepare(fields_list, unit))
362     DBUG_RETURN(true);
363 
364   if (olap == ROLLUP_TYPE && resolve_rollup(thd))
365     DBUG_RETURN(true); /* purecov: inspected */
366 
367   if (flatten_subqueries())
368     DBUG_RETURN(true);
369 
370   sj_candidates= NULL;
371 
372   /*
373     When reaching the top-most query block, or the next-to-top query block for
374     the SQL command SET and for SP instructions (indicated with SQLCOM_END),
375     apply local transformations to this query block and all underlying query
376     blocks.
377   */
378   if (outer_select() == NULL ||
379       ((parent_lex->sql_command == SQLCOM_SET_OPTION ||
380        parent_lex->sql_command == SQLCOM_END) &&
381        outer_select()->outer_select() == NULL))
382   {
383     /*
384       This code is invoked in the following cases:
385       - if this is an outer-most query block of a SELECT or multi-table
386         UPDATE/DELETE statement. Notice that for a UNION, this applies to
387         all query blocks. It also applies to a fake_select_lex object.
388       - if this is one of highest-level subqueries, if the statement is
389         something else; like subq-i in:
390           UPDATE t1 SET col1=(subq-1), col2=(subq-2);
391       - If this is a subquery in a SET command
392         @todo: Refactor SET so that this is not needed.
393 
394       Local transforms are applied after query block merging.
395       This means that we avoid unnecessary invocations, as local transforms
396       would otherwise have been performed first before query block merging and
397       then another time after query block merging.
398       Thus, apply_local_transforms() may run only after the top query
399       is finished with query block merging. That's why
400       apply_local_transforms() is initiated only by the top query, and then
401       recurses into subqueries.
402     */
403     if (apply_local_transforms(thd, true))
404       DBUG_RETURN(true);
405   }
406 
407   assert(!thd->is_error());
408   DBUG_RETURN(false);
409 }
410 
411 
412 /**
413   Apply local transformations, such as query block merging.
414   Also perform partition pruning, which is most effective after transformations
415   have been done.
416 
417   @param thd      thread handler
418   @param prune    if true, then prune partitions based on const conditions
419 
420   @returns false if success, true if error
421 
422   Since this is called after flattening of query blocks, call this function
423   while traversing the query block hierarchy top-down.
424 */
425 
apply_local_transforms(THD * thd,bool prune)426 bool SELECT_LEX::apply_local_transforms(THD *thd, bool prune)
427 {
428   DBUG_ENTER("SELECT_LEX::apply_local_transforms");
429 
430   /*
431     If query block contains one or more merged derived tables/views,
432     walk through lists of columns in select lists and remove unused columns.
433   */
434   if (derived_table_count &&
435       first_execution &&
436       !(thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW))
437     delete_unused_merged_columns(&top_join_list);
438 
439   for (SELECT_LEX_UNIT *unit= first_inner_unit();
440        unit;
441        unit= unit->next_unit())
442   {
443     for (SELECT_LEX *sl= unit->first_select();
444          sl;
445          sl= sl->next_select())
446     {
447       // Prune all subqueries, regardless of passed argument
448       if (sl->apply_local_transforms(thd, true))
449         DBUG_RETURN(true);
450     }
451     if (unit->fake_select_lex &&
452         unit->fake_select_lex->apply_local_transforms(thd, false))
453       DBUG_RETURN(true);
454   }
455 
456   if (first_execution &&
457       !(thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW))
458   {
459     /*
460       The following code will allocate the new items in a permanent
461       MEMROOT for prepared statements and stored procedures.
462     */
463     Prepared_stmt_arena_holder ps_arena_holder(thd);
464     // Convert all outer joins to inner joins if possible
465     if (simplify_joins(thd, &top_join_list, true, false, &m_where_cond))
466       DBUG_RETURN(true);
467     if (record_join_nest_info(&top_join_list))
468       DBUG_RETURN(true);
469     build_bitmap_for_nested_joins(&top_join_list, 0);
470 
471     /*
472       Here are the reasons why we do the following check here (i.e. late).
473       * setup_fields () may have done split_sum_func () on aggregate items of
474       the SELECT list, so for reliable comparison of the ORDER BY list with
475       the SELECT list, we need to wait until split_sum_func() is done with
476       the ORDER BY list.
477       * we get resolved expressions "most of the time", which is always a good
478       thing. Some outer references may not be resolved, though.
479       * we need nested_join::used_tables, and this member is set in
480       simplify_joins()
481       * simplify_joins() does outer-join-to-inner conversion, which increases
482       opportunities for functional dependencies (weak-to-strong, which is
483       unusable, becomes strong-to-strong).
484       * check_only_full_group_by() is dependent on processing done by
485       simplify_joins() (for example it uses the value of
486       SELECT_LEX::outer_join).
487 
488       The drawback is that the checks are after resolve_subquery(), so can
489       meet strange "internally added" items.
490 
491       Note that when we are creating a view, simplify_joins() doesn't run so
492       check_only_full_group_by() cannot run, any error will be raised only
493       when the view is later used (SELECTed...)
494     */
495     if ((is_distinct() || is_grouped()) &&
496         (thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY) &&
497         check_only_full_group_by(thd))
498       DBUG_RETURN(true);
499   }
500 
501   fix_prepare_information(thd);
502 
503   /*
504     Prune partitions for all query blocks after query block merging, if
505     pruning is wanted.
506   */
507   if (partitioned_table_count && prune)
508   {
509     for (TABLE_LIST *tbl= leaf_tables; tbl; tbl= tbl->next_leaf)
510     {
511       /*
512         This will only prune constant conditions, which will be used for
513         lock pruning.
514       */
515       if (prune_partitions(thd, tbl->table,
516                            tbl->join_cond() ? tbl->join_cond() :
517                                               m_where_cond))
518         DBUG_RETURN(true); /* purecov: inspected */
519     }
520   }
521 
522   DBUG_RETURN(false);
523 }
524 
525 
526 /**
527   Check if the subquery predicate can be executed via materialization.
528 
529   @param predicate IN subquery predicate
530   @param thd       THD
531   @param select_lex SELECT_LEX of the subquery
532   @param outer      Parent SELECT_LEX (outer to subquery)
533 
534   @return TRUE if subquery allows materialization, FALSE otherwise.
535 */
536 
subquery_allows_materialization(Item_in_subselect * predicate,THD * thd,SELECT_LEX * select_lex,const SELECT_LEX * outer)537 bool subquery_allows_materialization(Item_in_subselect *predicate,
538                                      THD *thd,
539                                      SELECT_LEX *select_lex,
540                                      const SELECT_LEX *outer)
541 {
542   const uint elements= predicate->unit->first_select()->item_list.elements;
543   DBUG_ENTER("subquery_allows_materialization");
544   assert(elements >= 1);
545   assert(predicate->left_expr->cols() == elements);
546 
547   OPT_TRACE_TRANSFORM(&thd->opt_trace, trace_wrapper, trace_mat,
548                       select_lex->select_number,
549                       "IN (SELECT)", "materialization");
550 
551   const char *cause= NULL;
552   if (predicate->substype() != Item_subselect::IN_SUBS)
553   {
554     // Subq-mat cannot handle 'outer_expr > {ANY|ALL}(subq)'...
555     cause= "not an IN predicate";
556   }
557   else if (select_lex->is_part_of_union())
558   {
559     // Subquery must be a single query specification clause (not a UNION)
560     cause= "in UNION";
561   }
562   else if (!select_lex->master_unit()->first_select()->leaf_tables)
563   {
564     // Subquery has no tables, hence no point in materializing.
565     cause= "no inner tables";
566   }
567   else if (!outer->join)
568   {
569     /*
570       Maybe this is a subquery of a single table UPDATE/DELETE (TODO:
571       handle this by switching to multi-table UPDATE/DELETE).
572     */
573     cause= "parent query has no JOIN";
574   }
575   else if (!outer->leaf_tables)
576   {
577     // The upper query is SELECT ... FROM DUAL. No gain in materializing.
578     cause= "no tables in outer query";
579   }
580   else if (predicate->dependent_before_in2exists())
581   {
582     /*
583       Subquery should not be correlated; the correlation due to predicates
584       injected by IN->EXISTS does not count as we will remove them if we
585       choose materialization.
586 
587       TODO:
588       This is an overly restrictive condition. It can be extended to:
589          (Subquery is non-correlated ||
590           Subquery is correlated to any query outer to IN predicate ||
591           (Subquery is correlated to the immediate outer query &&
592            Subquery !contains {GROUP BY, ORDER BY [LIMIT],
593            aggregate functions}) && subquery predicate is not under "NOT IN"))
594     */
595     cause= "correlated";
596   }
597   else
598   {
599     /*
600       Check that involved expression types allow materialization.
601       This is a temporary fix for BUG#36752; see bug report for
602       description of restrictions we need to put on the compared expressions.
603     */
604     assert(predicate->left_expr->fixed);
605     // @see comment in Item_subselect::element_index()
606     bool has_nullables= predicate->left_expr->maybe_null;
607 
608     List_iterator<Item> it(predicate->unit->first_select()->item_list);
609     for (uint i= 0; i < elements; i++)
610     {
611       Item * const inner= it++;
612       Item * const outer= predicate->left_expr->element_index(i);
613       if (!types_allow_materialization(outer, inner))
614       {
615         cause= "type mismatch";
616         break;
617       }
618       if (inner->is_blob_field())                 // 6
619       {
620         cause= "inner blob";
621         break;
622       }
623       has_nullables|= inner->maybe_null;
624     }
625 
626     if (!cause)
627     {
628       trace_mat.add("has_nullable_expressions", has_nullables);
629       /*
630         Subquery materialization cannot handle NULLs partial matching
631         properly, yet. If the outer or inner values are NULL, the
632         subselect_hash_sj_engine may reply FALSE when it should reply UNKNOWN.
633         So, we must limit it to those three cases:
634         - when FALSE and UNKNOWN are equivalent answers. I.e. this is a a
635         top-level predicate (this implies it is not negated).
636         - when outer and inner values cannot be NULL.
637         - when there is a single inner column (because for this we have a
638         limited implementation of NULLs partial matching).
639       */
640       const bool is_top_level= predicate->is_top_level_item();
641       trace_mat.add("treat_UNKNOWN_as_FALSE", is_top_level);
642 
643       if (!is_top_level && has_nullables && (elements > 1))
644         cause= "cannot_handle_partial_matches";
645       else
646       {
647         trace_mat.add("possible", true);
648         DBUG_RETURN(TRUE);
649       }
650     }
651   }
652   assert(cause != NULL);
653   trace_mat.add("possible", false).add_alnum("cause", cause);
654   DBUG_RETURN(false);
655 }
656 
657 
658 /**
659   Make list of leaf tables of join table tree
660 
661   @param list    pointer to pointer on list first element
662   @param tables  table list
663 
664   @returns pointer on pointer to next_leaf of last element
665 */
666 
make_leaf_tables(TABLE_LIST ** list,TABLE_LIST * tables)667 static TABLE_LIST **make_leaf_tables(TABLE_LIST **list, TABLE_LIST *tables)
668 {
669   for (TABLE_LIST *table= tables; table; table= table->next_local)
670   {
671     // A mergable view is not allowed to have a table pointer.
672     assert(!(table->is_view() && table->is_merged() && table->table));
673     if (table->merge_underlying_list)
674     {
675       assert(table->is_merged());
676 
677       list= make_leaf_tables(list, table->merge_underlying_list);
678     }
679     else
680     {
681       *list= table;
682       list= &table->next_leaf;
683     }
684   }
685   return list;
686 }
687 
688 
689 /**
690   Check privileges for the view tables merged into a query block.
691 
692   @param thd                   Thread context.
693   @param want_privilege_first  Privileges requested for the first leaf.
694   @param want_privilege_next   Privileges requested for the remaining leaves.
695 
696   @note Beware that it can't properly check privileges in cases when
697         table being changed is not the first table in the list of leaf
698         tables (for example, for multi-UPDATE).
699 
700   @note The inner loop is slightly inefficient. A view will have its privileges
701         checked once for every base table that it refers to.
702 
703   @returns false if success, true if error.
704 */
705 
check_view_privileges(THD * thd,ulong want_privilege_first,ulong want_privilege_next)706 bool st_select_lex::check_view_privileges(THD *thd,
707                                           ulong want_privilege_first,
708                                           ulong want_privilege_next)
709 {
710   ulong want_privilege= want_privilege_first;
711   Internal_error_handler_holder<View_error_handler, TABLE_LIST>
712     view_handler(thd, true, leaf_tables);
713 
714   for (TABLE_LIST *tl= leaf_tables; tl; tl= tl->next_leaf)
715   {
716     for (TABLE_LIST *ref= tl; ref->referencing_view; ref= ref->referencing_view)
717     {
718       if (check_single_table_access(thd, want_privilege, ref, false))
719         return true;
720     }
721     want_privilege= want_privilege_next;
722   }
723   return false;
724 }
725 
726 
727 /**
728   Set up table leaves in the query block based on list of tables.
729 
730   @param thd           Thread handler
731   @param tables        List of tables to handle
732   @param select_insert It is SELECT ... INSERT command
733 
734   @note
735     Check also that the 'used keys' and 'ignored keys' exists and set up the
736     table structure accordingly.
737     Create a list of leaf tables.
738 
739     This function has to be called for all tables that are used by items,
740     as otherwise table->map is not set and all Item_field will be regarded
741     as const items.
742 
743   @returns False on success, true on error
744 */
745 
setup_tables(THD * thd,TABLE_LIST * tables,bool select_insert)746 bool st_select_lex::setup_tables(THD *thd, TABLE_LIST *tables,
747                                  bool select_insert)
748 {
749   DBUG_ENTER("st_select_lex::setup_tables");
750 
751   assert ((select_insert && !tables->next_name_resolution_table) ||
752           !tables ||
753           (context.table_list && context.first_name_resolution_table));
754 
755   make_leaf_tables(&leaf_tables, tables);
756 
757   TABLE_LIST *first_select_table= NULL;
758   if (select_insert)
759   {
760     // "insert_table" is needed for remap_tables().
761     thd->lex->insert_table= leaf_tables->top_table();
762 
763     // Get first table in SELECT part
764     first_select_table= thd->lex->insert_table->next_local;
765 
766     // Then, find the first leaf table
767     if (first_select_table)
768       first_select_table= first_select_table->first_leaf_table();
769   }
770   uint tableno= 0;
771   leaf_table_count= 0;
772   partitioned_table_count= 0;
773 
774   for (TABLE_LIST *tr= leaf_tables; tr; tr= tr->next_leaf, tableno++)
775   {
776     TABLE *const table= tr->table;
777     if (tr == first_select_table)
778     {
779       /*
780         For INSERT ... SELECT command, restart numbering from zero for first
781         leaf table from SELECT part of query.
782       */
783       first_select_table= 0;
784       tableno= 0;
785     }
786     if (tableno >= MAX_TABLES)
787     {
788       my_error(ER_TOO_MANY_TABLES,MYF(0), static_cast<int>(MAX_TABLES));
789       DBUG_RETURN(true);
790     }
791     tr->set_tableno(tableno);
792     leaf_table_count++;       // Count the input tables of the query
793     if (table == NULL)
794       continue;
795     table->pos_in_table_list= tr;
796     tr->reset();
797 
798     /*
799       Only set hints on first execution.  Otherwise, hints will refer to
800       wrong query block after semijoin transformation
801     */
802     if (first_execution &&
803         opt_hints_qb &&                      // QB hints initialized
804         !tr->opt_hints_table)                // Table hints are not adjusted yet
805     {
806       tr->opt_hints_table= opt_hints_qb->adjust_table_hints(table, tr->alias);
807     }
808 
809     if (tr->process_index_hints(table))
810       DBUG_RETURN(true);
811     if (table->part_info)     // Count number of partitioned tables
812       partitioned_table_count++;
813   }
814 
815   if (opt_hints_qb)
816     opt_hints_qb->check_unresolved(thd);
817 
818   DBUG_RETURN(false);
819 }
820 
821 
822 /**
823   Re-map table numbers for all tables in a query block.
824 
825   @param thd           Thread handler
826 
827   @note
828     This function needs to be called after setup_tables() has been called,
829     and after a query block for a subquery has been merged into a parent
830     quary block.
831 */
832 
remap_tables(THD * thd)833 void st_select_lex::remap_tables(THD *thd)
834 {
835   LEX *const lex= thd->lex;
836   TABLE_LIST *first_select_table= NULL;
837   if (lex->insert_table &&
838       lex->insert_table == leaf_tables->top_table())
839   {
840     /*
841       For INSERT ... SELECT command, restart numbering from zero for first
842       leaf table from SELECT part of query.
843     */
844     // Get first table in SELECT part
845     first_select_table= lex->insert_table->next_local;
846 
847     // Then, recurse down to get first leaf table
848     if (first_select_table)
849       first_select_table= first_select_table->first_leaf_table();
850   }
851 
852   uint tableno= 0;
853   for (TABLE_LIST *tl= leaf_tables; tl; tl= tl->next_leaf)
854   {
855     // Reset table number after having reached first table after insert table
856     if (first_select_table == tl)
857       tableno= 0;
858     tl->set_tableno(tableno++);
859   }
860 }
861 
862 /**
863   @brief Resolve derived table and view references in query block
864 
865   @param thd            Pointer to THD.
866   @param apply_semijoin if true, apply semi-join transform when possible
867 
868   @return false if success, true if error
869 */
870 
resolve_derived(THD * thd,bool apply_semijoin)871 bool st_select_lex::resolve_derived(THD *thd, bool apply_semijoin)
872 {
873   DBUG_ENTER("st_select_lex::resolve_derived");
874 
875   assert(derived_table_count);
876 
877   // Prepare derived tables and views that belong to this query block.
878   for (TABLE_LIST *tl= get_table_list(); tl; tl= tl->next_local)
879   {
880     if (!tl->is_view_or_derived() || tl->is_merged())
881       continue;
882     if (tl->resolve_derived(thd, apply_semijoin))
883       DBUG_RETURN(true);
884   }
885 
886   /*
887     Merge the derived tables that do not require materialization into
888     the current query block, if possible.
889     Merging is only done once and must not be repeated for prepared execs.
890   */
891   if (!(thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW) &&
892       first_execution)
893   {
894     for (TABLE_LIST *tl= get_table_list(); tl; tl= tl->next_local)
895     {
896       if (!tl->is_view_or_derived() ||
897           tl->is_merged() ||
898           !tl->is_mergeable())
899       continue;
900       if (merge_derived(thd, tl))
901         DBUG_RETURN(true);        /* purecov: inspected */
902     }
903   }
904 
905   // Prepare remaining derived tables for materialization
906   for (TABLE_LIST *tl= get_table_list(); tl; tl= tl->next_local)
907   {
908     // Ensure that any derived table is merged or materialized after prepare:
909     assert(first_execution || !tl->is_view_or_derived() ||
910            tl->is_merged() || tl->uses_materialization());
911     if (!tl->is_view_or_derived() || tl->is_merged())
912       continue;
913     if (tl->setup_materialized_derived(thd))
914       DBUG_RETURN(true);
915     materialized_derived_table_count++;
916   }
917 
918   /*
919     The loops above will not reach derived tables that are contained within
920     other derived tables that have been merged into the enclosing query block.
921     To reach them, traverse the list of leaf tables and resolve and
922     setup for materialization those derived tables that have no TABLE
923     object (they have not been set up yet).
924   */
925   if (!first_execution)
926   {
927     for (TABLE_LIST *tl= leaf_tables; tl; tl= tl->next_leaf)
928     {
929       if (!tl->is_view_or_derived() || tl->table != NULL)
930         continue;
931       assert(!tl->is_merged());
932       if (tl->resolve_derived(thd, apply_semijoin))
933         DBUG_RETURN(true);        /* purecov: inspected */
934       if (tl->setup_materialized_derived(thd))
935         DBUG_RETURN(true);        /* purecov: inspected */
936       /*
937         materialized_derived_table_count was incremented during preparation,
938         so do not do it once more.
939       */
940     }
941   }
942 
943   DBUG_RETURN(false);
944 }
945 
946 
947 /**
948   @brief Resolve predicate involving subquery
949 
950   @param thd     Pointer to THD.
951 
952   @retval FALSE  Success.
953   @retval TRUE   Error.
954 
955   @details
956   Perform early unconditional subquery transformations:
957    - Convert subquery predicate into semi-join, or
958    - Mark the subquery for execution using materialization, or
959    - Perform IN->EXISTS transformation, or
960    - Perform more/less ALL/ANY -> MIN/MAX rewrite
961    - Substitute trivial scalar-context subquery with its value
962 
963   @todo for PS, make the whole block execute only on the first execution
964 
965 */
966 
resolve_subquery(THD * thd)967 bool SELECT_LEX::resolve_subquery(THD *thd)
968 {
969   DBUG_ENTER("resolve_subquery");
970 
971   bool chose_semijoin= false;
972   SELECT_LEX *const outer= outer_select();
973 
974   /*
975     @todo for PS, make the whole block execute only on the first execution.
976     resolve_subquery() is only invoked in the first execution for subqueries
977     that are transformed to semijoin, but for other subqueries, this function
978     is called for every execution. One solution is perhaps to define
979     exec_method in class Item_subselect and exit immediately if unequal to
980     EXEC_UNSPECIFIED.
981   */
982   Item_subselect *subq_predicate= master_unit()->item;
983   assert(subq_predicate);
984   /**
985     @note
986     In this case: IN (SELECT ... UNION SELECT ...), SELECT_LEX::prepare() is
987     called for each of the two UNION members, and in those two calls,
988     subq_predicate is the same, not sure this is desired (double work?).
989   */
990 
991   Item_in_subselect * const in_predicate=
992     (subq_predicate->substype() == Item_subselect::IN_SUBS) ?
993     static_cast<Item_in_subselect *>(subq_predicate) : NULL;
994 
995   if (in_predicate)
996   {
997     thd->lex->set_current_select(outer);
998     char const *save_where= thd->where;
999     thd->where= "IN/ALL/ANY subquery";
1000     Disable_semijoin_flattening DSF(outer, true);
1001 
1002     bool result= !in_predicate->left_expr->fixed &&
1003                   in_predicate->left_expr->fix_fields(thd,
1004                                                      &in_predicate->left_expr);
1005     thd->lex->set_current_select(this);
1006     thd->where= save_where;
1007     if (result)
1008       DBUG_RETURN(true);
1009 
1010     /*
1011       Check if the left and right expressions have the same # of
1012       columns, i.e. we don't have a case like
1013         (oe1, oe2) IN (SELECT ie1, ie2, ie3 ...)
1014 
1015       TODO why do we have this duplicated in IN->EXISTS transformers?
1016       psergey-todo: fix these: grep for duplicated_subselect_card_check
1017     */
1018     if (item_list.elements != in_predicate->left_expr->cols())
1019     {
1020       my_error(ER_OPERAND_COLUMNS, MYF(0), in_predicate->left_expr->cols());
1021       DBUG_RETURN(true);
1022     }
1023   }
1024 
1025   DBUG_PRINT("info", ("Checking if subq can be converted to semi-join"));
1026   /*
1027     Check if we're in subquery that is a candidate for flattening into a
1028     semi-join (which is done in flatten_subqueries()). The requirements are:
1029       1. Subquery predicate is an IN/=ANY subquery predicate
1030       2. Subquery is a single SELECT (not a UNION)
1031       3. Subquery does not have GROUP BY
1032       4. Subquery does not use aggregate functions or HAVING
1033       5. Subquery predicate is (a) in an ON/WHERE clause, and (b) at
1034       the AND-top-level of that clause.
1035       6. Parent query block accepts semijoins (i.e we are not in a subquery of
1036       a single table UPDATE/DELETE (TODO: We should handle this at some
1037       point by switching to multi-table UPDATE/DELETE)
1038       7. We're not in a confluent table-less subquery, like "SELECT 1".
1039       8. No execution method was already chosen (by a prepared statement).
1040       9. Parent query block is not a confluent table-less query block.
1041       10. Neither parent nor child query block has straight join.
1042       11. Parent query block does not prohibit semi-join.
1043   */
1044   if (semijoin_enabled(thd) &&
1045       in_predicate &&                                                   // 1
1046       !is_part_of_union() &&                                            // 2
1047       !group_list.elements &&                                           // 3
1048       !m_having_cond && !with_sum_func &&                               // 4
1049       (outer->resolve_place == st_select_lex::RESOLVE_CONDITION ||      // 5a
1050        outer->resolve_place == st_select_lex::RESOLVE_JOIN_NEST) &&     // 5a
1051       !outer->semijoin_disallowed &&                                    // 5b
1052       outer->sj_candidates &&                                           // 6
1053       leaf_table_count > 0 &&                                           // 7
1054       in_predicate->exec_method ==
1055                            Item_exists_subselect::EXEC_UNSPECIFIED &&   // 8
1056       outer->leaf_table_count &&                                        // 9
1057       !((active_options() | outer->active_options()) &
1058         SELECT_STRAIGHT_JOIN) &&                                        //10
1059       !(outer->active_options() & SELECT_NO_SEMI_JOIN))                 //11
1060   {
1061     DBUG_PRINT("info", ("Subquery is semi-join conversion candidate"));
1062 
1063     /* Notify in the subquery predicate where it belongs in the query graph */
1064     in_predicate->embedding_join_nest= outer->resolve_nest;
1065 
1066     /* Register the subquery for further processing in flatten_subqueries() */
1067     outer->sj_candidates->push_back(in_predicate);
1068     chose_semijoin= true;
1069   }
1070 
1071   if (in_predicate)
1072   {
1073     Opt_trace_context * const trace= &thd->opt_trace;
1074     OPT_TRACE_TRANSFORM(trace, oto0, oto1,
1075                         select_number, "IN (SELECT)", "semijoin");
1076     oto1.add("chosen", chose_semijoin);
1077   }
1078 
1079   if (!chose_semijoin &&
1080       subq_predicate->select_transformer(this) ==
1081       Item_subselect::RES_ERROR)
1082     DBUG_RETURN(true);
1083 
1084   DBUG_RETURN(false);
1085 }
1086 
1087 
1088 /**
1089   Expand all '*' in list of expressions with the matching column references
1090 
1091   Function should not be called with no wild cards in select list
1092 
1093   @param  thd     thread handler
1094 
1095   @returns false if OK, true if error
1096 */
1097 
setup_wild(THD * thd)1098 bool SELECT_LEX::setup_wild(THD *thd)
1099 {
1100   DBUG_ENTER("SELECT_LEX::setup_wild");
1101 
1102   assert(with_wild);
1103 
1104   // PS/SP uses arena so that changes are made permanently.
1105   Prepared_stmt_arena_holder ps_arena_holder(thd);
1106 
1107   Item *item;
1108   List_iterator<Item> it(fields_list);
1109 
1110   while (with_wild && (item= it++))
1111   {
1112     Item_field *item_field;
1113     if (item->type() == Item::FIELD_ITEM &&
1114         (item_field= down_cast<Item_field *>(item)) &&
1115         item_field->is_asterisk())
1116     {
1117       assert(item_field->field == NULL);
1118       const uint elem= fields_list.elements;
1119       const bool any_privileges= item_field->any_privileges;
1120       Item_subselect *subsel= master_unit()->item;
1121 
1122       /*
1123         In case of EXISTS(SELECT * ... HAVING ...), don't use this
1124         transformation. The columns in HAVING will need to resolve to the
1125         select list. Replacing * with 1 effectively eliminates this
1126         possibility.
1127       */
1128       if (subsel && subsel->substype() == Item_subselect::EXISTS_SUBS &&
1129           !having_cond())
1130       {
1131         /*
1132           It is EXISTS(SELECT * ...) and we can replace * by any constant.
1133 
1134           Item_int do not need fix_fields() because it is basic constant.
1135         */
1136         it.replace(new Item_int(NAME_STRING("Not_used"), (longlong) 1,
1137                                 MY_INT64_NUM_DECIMAL_DIGITS));
1138       }
1139       else
1140       {
1141         if (insert_fields(thd, item_field->context,
1142                           item_field->db_name, item_field->table_name,
1143                           &it, any_privileges))
1144           DBUG_RETURN(true);
1145       }
1146       /*
1147         all_fields is a list that has the fields list as a tail.
1148         Because of this we have to update the element count also for this
1149         list after expanding the '*' entry.
1150       */
1151       all_fields.elements+= fields_list.elements - elem;
1152 
1153       with_wild--;
1154     }
1155   }
1156 
1157   DBUG_RETURN(false);
1158 }
1159 
1160 /**
1161   Resolve WHERE condition and join conditions
1162 
1163   @param  thd     thread handler
1164 
1165   @returns false if success, true if error
1166 */
setup_conds(THD * thd)1167 bool SELECT_LEX::setup_conds(THD *thd)
1168 {
1169   DBUG_ENTER("SELECT_LEX::setup_conds");
1170 
1171   /*
1172     it_is_update set to TRUE when tables of primary SELECT_LEX (SELECT_LEX
1173     which belong to LEX, i.e. most up SELECT) will be updated by
1174     INSERT/UPDATE/LOAD
1175     NOTE: using this condition helps to prevent call of prepare_check_option()
1176     from subquery of VIEW, because tables of subquery belongs to VIEW
1177     (see condition before prepare_check_option() call)
1178   */
1179   const bool it_is_update= (this == thd->lex->select_lex) &&
1180                            thd->lex->which_check_option_applicable();
1181   const bool save_is_item_list_lookup= is_item_list_lookup;
1182   is_item_list_lookup= false;
1183 
1184   DBUG_PRINT("info", ("thd->mark_used_columns: %d", thd->mark_used_columns));
1185 
1186   if (m_where_cond)
1187   {
1188     resolve_place= st_select_lex::RESOLVE_CONDITION;
1189     thd->where="where clause";
1190     if ((!m_where_cond->fixed &&
1191          m_where_cond->fix_fields(thd, &m_where_cond)) ||
1192 	m_where_cond->check_cols(1))
1193       DBUG_RETURN(true);
1194     resolve_place= st_select_lex::RESOLVE_NONE;
1195   }
1196 
1197   /*
1198     Apply fix_fields() to all ON clauses at all levels of nesting,
1199     including the ones inside view definitions.
1200   */
1201   for (TABLE_LIST *table= leaf_tables; table; table= table->next_leaf)
1202   {
1203     TABLE_LIST *embedded; /* The table at the current level of nesting. */
1204     TABLE_LIST *embedding= table; /* The parent nested table reference. */
1205     do
1206     {
1207       embedded= embedding;
1208       if (embedded->join_cond())
1209       {
1210         resolve_place= st_select_lex::RESOLVE_JOIN_NEST;
1211         resolve_nest= embedded;
1212         thd->where="on clause";
1213         if ((!embedded->join_cond()->fixed &&
1214            embedded->join_cond()->fix_fields(thd, embedded->join_cond_ref())) ||
1215 	   embedded->join_cond()->check_cols(1))
1216           DBUG_RETURN(true);
1217         cond_count++;
1218         resolve_place= st_select_lex::RESOLVE_NONE;
1219         resolve_nest= NULL;
1220       }
1221       embedding= embedded->embedding;
1222     }
1223     while (embedding &&
1224            embedding->nested_join->join_list.head() == embedded);
1225 
1226     /* process CHECK OPTION */
1227     if (it_is_update)
1228     {
1229       TABLE_LIST *view= table->top_table();
1230       if (view->is_view() && view->is_merged())
1231       {
1232         if (view->prepare_check_option(thd))
1233           DBUG_RETURN(true);        /* purecov: inspected */
1234         table->check_option= view->check_option;
1235       }
1236     }
1237   }
1238 
1239   is_item_list_lookup= save_is_item_list_lookup;
1240 
1241   assert(thd->lex->current_select() == this);
1242   assert(!thd->is_error());
1243   DBUG_RETURN(false);
1244 }
1245 
1246 
1247 /**
1248   Set NESTED_JOIN::counter=0 in all nested joins in passed list.
1249 
1250   @param join_list  Pass NULL. Non-NULL is reserved for recursive inner calls,
1251   then it is a list of nested joins to process, and may also contain base
1252   tables which will be ignored.
1253 */
1254 
reset_nj_counters(List<TABLE_LIST> * join_list)1255 void SELECT_LEX::reset_nj_counters(List<TABLE_LIST> *join_list)
1256 {
1257   if (join_list == NULL)
1258     join_list= &top_join_list;
1259   List_iterator<TABLE_LIST> li(*join_list);
1260   TABLE_LIST *table;
1261   DBUG_ENTER("reset_nj_counters");
1262   while ((table= li++))
1263   {
1264     NESTED_JOIN *nested_join;
1265     if ((nested_join= table->nested_join))
1266     {
1267       nested_join->nj_counter= 0;
1268       reset_nj_counters(&nested_join->join_list);
1269     }
1270   }
1271   DBUG_VOID_RETURN;
1272 }
1273 
1274 
1275 /**
1276   Simplify joins replacing outer joins by inner joins whenever it's
1277   possible.
1278 
1279     The function, during a retrieval of join_list,  eliminates those
1280     outer joins that can be converted into inner join, possibly nested.
1281     It also moves the join conditions for the converted outer joins
1282     and from inner joins to conds.
1283     The function also calculates some attributes for nested joins:
1284 
1285     -# used_tables
1286     -# not_null_tables
1287     -# dep_tables.
1288     -# on_expr_dep_tables
1289 
1290     The first two attributes are used to test whether an outer join can
1291     be substituted by an inner join. The third attribute represents the
1292     relation 'to be dependent on' for tables. If table t2 is dependent
1293     on table t1, then in any evaluated execution plan table access to
1294     table t2 must precede access to table t2. This relation is used also
1295     to check whether the query contains  invalid cross-references.
1296     The fourth attribute is an auxiliary one and is used to calculate
1297     dep_tables.
1298     As the attribute dep_tables qualifies possibles orders of tables in the
1299     execution plan, the dependencies required by the straight join
1300     modifiers are reflected in this attribute as well.
1301     The function also removes all parentheses that can be removed from the join
1302     expression without changing its meaning.
1303 
1304   @note
1305     An outer join can be replaced by an inner join if the where condition
1306     or the join condition for an embedding nested join contains a conjunctive
1307     predicate rejecting null values for some attribute of the inner tables.
1308 
1309     E.g. in the query:
1310     @code
1311       SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a WHERE t2.b < 5
1312     @endcode
1313     the predicate t2.b < 5 rejects nulls.
1314     The query is converted first to:
1315     @code
1316       SELECT * FROM t1 INNER JOIN t2 ON t2.a=t1.a WHERE t2.b < 5
1317     @endcode
1318     then to the equivalent form:
1319     @code
1320       SELECT * FROM t1, t2 ON t2.a=t1.a WHERE t2.b < 5 AND t2.a=t1.a
1321     @endcode
1322 
1323 
1324     Similarly the following query:
1325     @code
1326       SELECT * from t1 LEFT JOIN (t2, t3) ON t2.a=t1.a t3.b=t1.b
1327         WHERE t2.c < 5
1328     @endcode
1329     is converted to:
1330     @code
1331       SELECT * FROM t1, (t2, t3) WHERE t2.c < 5 AND t2.a=t1.a t3.b=t1.b
1332 
1333     @endcode
1334 
1335     One conversion might trigger another:
1336     @code
1337       SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a
1338                        LEFT JOIN t3 ON t3.b=t2.b
1339         WHERE t3 IS NOT NULL =>
1340       SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a, t3
1341         WHERE t3 IS NOT NULL AND t3.b=t2.b =>
1342       SELECT * FROM t1, t2, t3
1343         WHERE t3 IS NOT NULL AND t3.b=t2.b AND t2.a=t1.a
1344   @endcode
1345 
1346     The function removes all unnecessary parentheses from the expression
1347     produced by the conversions.
1348     E.g.
1349     @code
1350       SELECT * FROM t1, (t2, t3) WHERE t2.c < 5 AND t2.a=t1.a AND t3.b=t1.b
1351     @endcode
1352     finally is converted to:
1353     @code
1354       SELECT * FROM t1, t2, t3 WHERE t2.c < 5 AND t2.a=t1.a AND t3.b=t1.b
1355 
1356     @endcode
1357 
1358 
1359     It also will remove parentheses from the following queries:
1360     @code
1361       SELECT * from (t1 LEFT JOIN t2 ON t2.a=t1.a) LEFT JOIN t3 ON t3.b=t2.b
1362       SELECT * from (t1, (t2,t3)) WHERE t1.a=t2.a AND t2.b=t3.b.
1363     @endcode
1364 
1365     The benefit of this simplification procedure is that it might return
1366     a query for which the optimizer can evaluate execution plans with more
1367     join orders. With a left join operation the optimizer does not
1368     consider any plan where one of the inner tables is before some of outer
1369     tables.
1370 
1371   IMPLEMENTATION
1372     The function is implemented by a recursive procedure.  On the recursive
1373     ascent all attributes are calculated, all outer joins that can be
1374     converted are replaced and then all unnecessary parentheses are removed.
1375     As join list contains join tables in the reverse order sequential
1376     elimination of outer joins does not require extra recursive calls.
1377 
1378   SEMI-JOIN NOTES
1379     Remove all semi-joins that have are within another semi-join (i.e. have
1380     an "ancestor" semi-join nest)
1381 
1382   EXAMPLES
1383     Here is an example of a join query with invalid cross references:
1384     @code
1385       SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t3.a LEFT JOIN t3 ON t3.b=t1.b
1386     @endcode
1387 
1388   @param thd         thread handler
1389   @param join_list   list representation of the join to be converted
1390   @param top         true <=> cond is the where condition
1391   @param in_sj       TRUE <=> processing semi-join nest's children
1392   @param[in,out] cond In: condition to which the join condition for converted
1393                           outer joins is to be added;
1394                       Out: new condition
1395   @param changelog   Don't specify this parameter, it is reserved for
1396                      recursive calls inside this function
1397 
1398   @returns true for error, false for success
1399 */
1400 bool
simplify_joins(THD * thd,List<TABLE_LIST> * join_list,bool top,bool in_sj,Item ** cond,uint * changelog)1401 SELECT_LEX::simplify_joins(THD *thd,
1402                            List<TABLE_LIST> *join_list, bool top,
1403                            bool in_sj, Item **cond, uint *changelog)
1404 {
1405   /*
1406     Each type of change done by this function, or its recursive calls, is
1407     tracked in a bitmap:
1408   */
1409   enum change
1410   {
1411     NONE= 0,
1412     OUTER_JOIN_TO_INNER= 1 << 0,
1413     JOIN_COND_TO_WHERE= 1 << 1,
1414     PAREN_REMOVAL= 1 << 2,
1415     SEMIJOIN= 1 << 3
1416   };
1417   uint changes= 0; // To keep track of changes.
1418   if (changelog == NULL) // This is the top call.
1419     changelog= &changes;
1420 
1421   TABLE_LIST *table;
1422   NESTED_JOIN *nested_join;
1423   TABLE_LIST *prev_table= 0;
1424   List_iterator<TABLE_LIST> li(*join_list);
1425   const bool straight_join= active_options() & SELECT_STRAIGHT_JOIN;
1426   DBUG_ENTER("simplify_joins");
1427 
1428   /*
1429     Try to simplify join operations from join_list.
1430     The most outer join operation is checked for conversion first.
1431   */
1432   while ((table= li++))
1433   {
1434     table_map used_tables;
1435     table_map not_null_tables= (table_map) 0;
1436 
1437     if ((nested_join= table->nested_join))
1438     {
1439       /*
1440          If the element of join_list is a nested join apply
1441          the procedure to its nested join list first.
1442       */
1443       if (table->join_cond())
1444       {
1445         Item *join_cond= table->join_cond();
1446         /*
1447            If a join condition JC is attached to the table,
1448            check all null rejected predicates in this condition.
1449            If such a predicate over an attribute belonging to
1450            an inner table of an embedded outer join is found,
1451            the outer join is converted to an inner join and
1452            the corresponding join condition is added to JC.
1453 	*/
1454         if (simplify_joins(thd, &nested_join->join_list,
1455                            false, in_sj || table->sj_cond(),
1456                            &join_cond, changelog))
1457           DBUG_RETURN(true);
1458 
1459         if (join_cond != table->join_cond())
1460         {
1461           assert(join_cond);
1462           table->set_join_cond(join_cond);
1463         }
1464       }
1465       nested_join->used_tables= (table_map) 0;
1466       nested_join->not_null_tables=(table_map) 0;
1467       if (simplify_joins(thd, &nested_join->join_list, top,
1468                          in_sj || table->sj_cond(), cond, changelog))
1469         DBUG_RETURN(true);
1470       used_tables= nested_join->used_tables;
1471       not_null_tables= nested_join->not_null_tables;
1472     }
1473     else
1474     {
1475       used_tables= table->map();
1476       if (*cond)
1477         not_null_tables= (*cond)->not_null_tables();
1478     }
1479 
1480     if (table->embedding)
1481     {
1482       table->embedding->nested_join->used_tables|= used_tables;
1483       table->embedding->nested_join->not_null_tables|= not_null_tables;
1484     }
1485 
1486     if (!table->outer_join || (used_tables & not_null_tables))
1487     {
1488       /*
1489         For some of the inner tables there are conjunctive predicates
1490         that reject nulls => the outer join can be replaced by an inner join.
1491       */
1492       if (table->outer_join)
1493       {
1494         *changelog|= OUTER_JOIN_TO_INNER;
1495         table->outer_join= 0;
1496       }
1497       if (table->join_cond())
1498       {
1499         *changelog|= JOIN_COND_TO_WHERE;
1500         /* Add join condition to the WHERE or upper-level join condition. */
1501         if (*cond)
1502         {
1503           Item_cond_and *new_cond=
1504             static_cast<Item_cond_and*>(and_conds(*cond, table->join_cond()));
1505           if (!new_cond)
1506             DBUG_RETURN(true);
1507           new_cond->top_level_item();
1508           /*
1509             It is always a new item as both the upper-level condition and a
1510             join condition existed
1511           */
1512           assert(!new_cond->fixed);
1513           if (new_cond->fix_fields(thd, NULL))
1514             DBUG_RETURN(true);
1515 
1516           /* If join condition has a pending rollback in THD::change_list */
1517           List_iterator<Item> lit(*new_cond->argument_list());
1518           Item *arg;
1519           while ((arg= lit++))
1520           {
1521             /*
1522               Check whether the arguments to AND need substitution
1523               of rollback location.
1524             */
1525             thd->replace_rollback_place(lit.ref());
1526           }
1527           *cond= new_cond;
1528         }
1529         else
1530         {
1531           *cond= table->join_cond();
1532           /* If join condition has a pending rollback in THD::change_list */
1533           thd->replace_rollback_place(cond);
1534         }
1535         table->set_join_cond(NULL);
1536       }
1537     }
1538 
1539     if (!top)
1540       continue;
1541 
1542     /*
1543       Only inner tables of non-convertible outer joins remain with
1544       the join condition.
1545     */
1546     if (table->join_cond())
1547     {
1548       table->dep_tables|= table->join_cond()->used_tables();
1549 
1550       // At this point the joined tables always have an embedding join nest:
1551       assert(table->embedding);
1552 
1553       table->dep_tables&= ~table->embedding->nested_join->used_tables;
1554 
1555       // Embedding table depends on tables used in embedded join conditions.
1556       table->embedding->on_expr_dep_tables|= table->join_cond()->used_tables();
1557     }
1558 
1559     if (prev_table)
1560     {
1561       /* The order of tables is reverse: prev_table follows table */
1562       if (prev_table->straight || straight_join)
1563         prev_table->dep_tables|= used_tables;
1564       if (prev_table->join_cond())
1565       {
1566         prev_table->dep_tables|= table->on_expr_dep_tables;
1567         table_map prev_used_tables= prev_table->nested_join ?
1568 	                            prev_table->nested_join->used_tables :
1569 	                            prev_table->map();
1570         /*
1571           If join condition contains only references to inner tables
1572           we still make the inner tables dependent on the outer tables.
1573           It would be enough to set dependency only on one outer table
1574           for them. Yet this is really a rare case.
1575           Note:
1576           PSEUDO_TABLE_BITS mask should not be counted as it
1577           prevents update of inner table dependencies.
1578           For example it might happen if RAND()/COUNT(*) function
1579           is used in JOIN ON clause.
1580 	*/
1581         if (!((prev_table->join_cond()->used_tables() & ~PSEUDO_TABLE_BITS) &
1582               ~prev_used_tables))
1583           prev_table->dep_tables|= used_tables;
1584       }
1585     }
1586     prev_table= table;
1587   }
1588 
1589   /*
1590     Flatten nested joins that can be flattened.
1591     no join condition and not a semi-join => can be flattened.
1592   */
1593   li.rewind();
1594   while ((table= li++))
1595   {
1596     nested_join= table->nested_join;
1597     if (table->sj_cond() && !in_sj)
1598     {
1599        /*
1600          If this is a semi-join that is not contained within another semi-join,
1601          leave it intact (otherwise it is flattened)
1602        */
1603       *changelog|= SEMIJOIN;
1604     }
1605     else if (nested_join && !table->join_cond())
1606     {
1607       *changelog|= PAREN_REMOVAL;
1608       TABLE_LIST *tbl;
1609       List_iterator<TABLE_LIST> it(nested_join->join_list);
1610       while ((tbl= it++))
1611       {
1612         tbl->embedding= table->embedding;
1613         tbl->join_list= table->join_list;
1614         tbl->dep_tables|= table->dep_tables;
1615       }
1616       li.replace(nested_join->join_list);
1617     }
1618   }
1619 
1620   if (changes)
1621   {
1622     Opt_trace_context * trace= &thd->opt_trace;
1623     if (unlikely(trace->is_started()))
1624     {
1625       Opt_trace_object trace_wrapper(trace);
1626       Opt_trace_object trace_object(trace, "transformations_to_nested_joins");
1627       {
1628         Opt_trace_array trace_changes(trace, "transformations");
1629         if (changes & SEMIJOIN)
1630           trace_changes.add_alnum("semijoin");
1631         if (changes & OUTER_JOIN_TO_INNER)
1632           trace_changes.add_alnum("outer_join_to_inner_join");
1633         if (changes & JOIN_COND_TO_WHERE)
1634           trace_changes.add_alnum("JOIN_condition_to_WHERE");
1635         if (changes & PAREN_REMOVAL)
1636           trace_changes.add_alnum("parenthesis_removal");
1637       }
1638       // the newly transformed query is worth printing
1639       opt_trace_print_expanded_query(thd, this, &trace_object);
1640     }
1641   }
1642   DBUG_RETURN(false);
1643 }
1644 
1645 
1646 /**
1647   Record join nest info in the select block.
1648 
1649   After simplification of inner join, outer join and semi-join structures:
1650    - record the remaining semi-join structures in the enclosing query block.
1651    - record transformed join conditions in TABLE_LIST objects.
1652 
1653   This function is called recursively for each join nest and/or table
1654   in the query block.
1655 
1656   @param select The query block
1657   @param tables List of tables and join nests
1658 
1659   @return False if successful, True if failure
1660 */
record_join_nest_info(List<TABLE_LIST> * tables)1661 bool SELECT_LEX::record_join_nest_info(List<TABLE_LIST> *tables)
1662 {
1663   TABLE_LIST *table;
1664   List_iterator<TABLE_LIST> li(*tables);
1665   DBUG_ENTER("record_join_nest_info");
1666 
1667   while ((table= li++))
1668   {
1669     if (table->nested_join == NULL)
1670     {
1671       if (table->join_cond())
1672         outer_join|= table->map();
1673       continue;
1674     }
1675 
1676     if (record_join_nest_info(&table->nested_join->join_list))
1677       DBUG_RETURN(true);
1678     /*
1679       sj_inner_tables is set properly later in pull_out_semijoin_tables().
1680       This assignment is required in case pull_out_semijoin_tables()
1681       is not called.
1682     */
1683     if (table->sj_cond())
1684       table->sj_inner_tables= table->nested_join->used_tables;
1685 
1686     if (table->sj_cond() && sj_nests.push_back(table))
1687       DBUG_RETURN(true);
1688 
1689     if (table->join_cond())
1690       outer_join|= table->nested_join->used_tables;
1691   }
1692   DBUG_RETURN(false);
1693 }
1694 
1695 
subq_sj_candidate_cmp(Item_exists_subselect * const * el1,Item_exists_subselect * const * el2)1696 static int subq_sj_candidate_cmp(Item_exists_subselect* const *el1,
1697                                  Item_exists_subselect* const *el2)
1698 {
1699   /*
1700     Remove this assert when we support semijoin on non-IN subqueries.
1701   */
1702   assert((*el1)->substype() == Item_subselect::IN_SUBS &&
1703          (*el2)->substype() == Item_subselect::IN_SUBS);
1704   return ((*el1)->sj_convert_priority < (*el2)->sj_convert_priority) ? 1 :
1705          ( ((*el1)->sj_convert_priority == (*el2)->sj_convert_priority)? 0 : -1);
1706 }
1707 
1708 
1709 /**
1710   Update table reference information for conditions and expressions due to
1711   query blocks having been merged in from derived tables/views and due to
1712   semi-join transformation.
1713 
1714   This is needed for two reasons:
1715 
1716   1. Since table numbers are changed, we need to update used_tables
1717      information for all conditions and expressions that are possibly touched.
1718 
1719   2. For semi-join, some column references are changed from outer references
1720      to local references.
1721 
1722   The function needs to recursively walk down into join nests,
1723   in order to cover all conditions and expressions.
1724 
1725   For a semi-join, tables from the subquery are added last in the query block.
1726   This means that conditions and expressions from the outer query block
1727   are unaffected. But all conditions inside the semi-join nest, including
1728   join conditions, must have their table numbers changed.
1729 
1730   For a derived table/view, tables from the subquery are merged into the
1731   outer query, and this function is called for every derived table that is
1732   merged in. This algorithm only works when derived tables are merged in
1733   the order of their original table numbers.
1734 
1735   A hypothetical example with a triple self-join over a mergeable view:
1736 
1737     CREATE VIEW v AS SELECT t1.a, t2.b FROM t1 JOIN t2 USING (a);
1738     SELECT v1.a, v1.b, v2.b, v3.b
1739     FROM v AS v1 JOIN v AS v2 ON ... JOIN v AS v3 ON ...;
1740 
1741   The analysis starts with three tables v1, v2 and v3 having numbers 0, 1, 2.
1742   First we merge in v1, so we get (t1, t2, v2, v3). v2 and v3 are shifted up.
1743   Tables from v1 need to have their table numbers altered (actually they do not
1744   since both old and new numbers are 0 and 1, but this is a special case).
1745   v2 and v3 are not merged in yet, so we delay pullout on them until they
1746   are merged. Conditions and expressions from the outer query are not resolved
1747   yet, so regular resolving will take of them later.
1748   Then we merge in v2, so we get (t1, t2, t1, t2, v3). The tables from this
1749   view gets numbers 2 and 3, and v3 gets number 4.
1750   Because v2 had a higher number than the tables from v1, the join nest
1751   representing v1 is unaffected. And v3 is still not merged, so the only
1752   join nest we need to consider is v2.
1753   Finally we merge in v3, and then we have tables (t1, t2, t1, t2, t1, t2),
1754   with numbers 0 through 5.
1755   Again, since v3 has higher number than any of the already merged in views,
1756   only this join nest needs the pullout.
1757 
1758   @param parent_select  Query block being merged into
1759   @param removed_select Query block that is removed (subquery)
1760   @param tr             Table object this pullout is applied to
1761   @param table_adjust   Number of positions that a derived table nest is
1762                         adjusted, used to fix up semi-join related fields.
1763                         Tables are adjusted from position N to N+table_adjust
1764 */
1765 
fix_tables_after_pullout(st_select_lex * parent_select,st_select_lex * removed_select,TABLE_LIST * tr,uint table_adjust)1766 static void fix_tables_after_pullout(st_select_lex *parent_select,
1767                                      st_select_lex *removed_select,
1768                                      TABLE_LIST *tr,
1769                                      uint table_adjust)
1770 {
1771   if (tr->is_merged())
1772   {
1773     // Update select list of merged derived tables:
1774     for (Field_translator *transl= tr->field_translation;
1775          transl < tr->field_translation_end;
1776          transl++)
1777     {
1778       assert(transl->item->fixed);
1779       transl->item->fix_after_pullout(parent_select, removed_select);
1780     }
1781     // Update used table info for the WHERE clause of the derived table
1782     assert(!tr->derived_where_cond ||
1783            tr->derived_where_cond->fixed);
1784     if (tr->derived_where_cond)
1785       tr->derived_where_cond->fix_after_pullout(parent_select,
1786                                                 removed_select);
1787   }
1788 
1789   /*
1790     If join_cond() is fixed, it contains a join condition from a subquery
1791     that has already been resolved. Call fix_after_pullout() to update
1792     used table information since table numbers may have changed.
1793     If join_cond() is not fixed, it contains a condition that was generated
1794     in the derived table merge operation, which will be fixed later.
1795     This condition may also contain a fixed part, but this is saved as
1796     derived_where_cond and is pulled out explicitly.
1797   */
1798   if (tr->join_cond() && tr->join_cond()->fixed)
1799       tr->join_cond()->fix_after_pullout(parent_select, removed_select);
1800 
1801   if (tr->nested_join)
1802   {
1803     // In case a derived table is merged-in, these fields need adjustment:
1804     tr->nested_join->sj_corr_tables<<= table_adjust;
1805     tr->nested_join->sj_depends_on<<= table_adjust;
1806 
1807     List_iterator<TABLE_LIST> it(tr->nested_join->join_list);
1808     TABLE_LIST *child;
1809     while ((child= it++))
1810       fix_tables_after_pullout(parent_select, removed_select, child,
1811                                table_adjust);
1812   }
1813 }
1814 
1815 
1816 /**
1817   Convert a subquery predicate of this query block into a TABLE_LIST semi-join
1818   nest.
1819 
1820   @param subq_pred   Subquery predicate to be converted.
1821                      This is either an IN, =ANY or EXISTS predicate.
1822 
1823   @retval FALSE OK
1824   @retval TRUE  Error
1825 
1826   @details
1827 
1828   The following transformations are performed:
1829 
1830   1. IN/=ANY predicates on the form:
1831 
1832   SELECT ...
1833   FROM ot1 ... otN
1834   WHERE (oe1, ... oeM) IN (SELECT ie1, ..., ieM)
1835                            FROM it1 ... itK
1836                           [WHERE inner-cond])
1837    [AND outer-cond]
1838   [GROUP BY ...] [HAVING ...] [ORDER BY ...]
1839 
1840   are transformed into:
1841 
1842   SELECT ...
1843   FROM (ot1 ... otN) SJ (it1 ... itK)
1844                      ON (oe1, ... oeM) = (ie1, ..., ieM)
1845                         [AND inner-cond]
1846   [WHERE outer-cond]
1847   [GROUP BY ...] [HAVING ...] [ORDER BY ...]
1848 
1849   Notice that the inner-cond may contain correlated and non-correlated
1850   expressions. Further transformations will analyze and break up such
1851   expressions.
1852 
1853   Prepared Statements: the transformation is permanent:
1854    - Changes in TABLE_LIST structures are naturally permanent
1855    - Item tree changes are performed on statement MEM_ROOT:
1856       = we activate statement MEM_ROOT
1857       = this function is called before the first fix_prepare_information call.
1858 
1859   This is intended because the criteria for subquery-to-sj conversion remain
1860   constant for the lifetime of the Prepared Statement.
1861 */
1862 bool
convert_subquery_to_semijoin(Item_exists_subselect * subq_pred)1863 SELECT_LEX::convert_subquery_to_semijoin(Item_exists_subselect *subq_pred)
1864 {
1865   TABLE_LIST *emb_tbl_nest= NULL;
1866   List<TABLE_LIST> *emb_join_list= &top_join_list;
1867   THD *const thd= subq_pred->unit->thd;
1868   DBUG_ENTER("convert_subquery_to_semijoin");
1869 
1870   assert(subq_pred->substype() == Item_subselect::IN_SUBS);
1871 
1872   bool outer_join= false;  // True if predicate is inner to an outer join
1873 
1874   /*
1875     Find out where to insert the semi-join nest and the generated condition.
1876 
1877     For t1 LEFT JOIN t2, embedding_join_nest will be t2.
1878     Note that t2 may be a simple table or may itself be a join nest
1879     (e.g. in the case t1 LEFT JOIN (t2 JOIN t3))
1880   */
1881   if (subq_pred->embedding_join_nest != NULL)
1882   {
1883     // Is this on inner side of an outer join?
1884     outer_join= subq_pred->embedding_join_nest->is_inner_table_of_outer_join();
1885 
1886     if (subq_pred->embedding_join_nest->nested_join)
1887     {
1888       /*
1889         We're dealing with
1890 
1891           ... [LEFT] JOIN  ( ... ) ON (subquery AND condition) ...
1892 
1893         The sj-nest will be inserted into the brackets nest.
1894       */
1895       emb_tbl_nest=  subq_pred->embedding_join_nest;
1896       emb_join_list= &emb_tbl_nest->nested_join->join_list;
1897     }
1898     else if (!subq_pred->embedding_join_nest->outer_join)
1899     {
1900       /*
1901         We're dealing with
1902 
1903           ... INNER JOIN tblX ON (subquery AND condition) ...
1904 
1905         The sj-nest will be tblX's "sibling", i.e. another child of its
1906         parent. This is ok because tblX is joined as an inner join.
1907       */
1908       emb_tbl_nest= subq_pred->embedding_join_nest->embedding;
1909       if (emb_tbl_nest)
1910         emb_join_list= &emb_tbl_nest->nested_join->join_list;
1911     }
1912     else if (!subq_pred->embedding_join_nest->nested_join)
1913     {
1914       TABLE_LIST *outer_tbl= subq_pred->embedding_join_nest;
1915       /*
1916         We're dealing with
1917 
1918           ... LEFT JOIN tbl ON (on_expr AND subq_pred) ...
1919 
1920         we'll need to convert it into:
1921 
1922           ... LEFT JOIN ( tbl SJ (subq_tables) ) ON (on_expr AND subq_pred) ...
1923                         |                      |
1924                         |<----- wrap_nest ---->|
1925 
1926         Q:  other subqueries may be pointing to this element. What to do?
1927         A1: simple solution: copy *subq_pred->embedding_join_nest= *parent_nest.
1928             But we'll need to fix other pointers.
1929         A2: Another way: have TABLE_LIST::next_ptr so the following
1930             subqueries know the table has been nested.
1931         A3: changes in the TABLE_LIST::outer_join will make everything work
1932             automatically.
1933       */
1934       TABLE_LIST *const wrap_nest=
1935         TABLE_LIST::new_nested_join(thd->mem_root, "(sj-wrap)",
1936                                     outer_tbl->embedding, outer_tbl->join_list,
1937                                     this);
1938       if (wrap_nest == NULL)
1939         DBUG_RETURN(true);
1940 
1941       wrap_nest->nested_join->join_list.push_back(outer_tbl);
1942 
1943       outer_tbl->embedding= wrap_nest;
1944       outer_tbl->join_list= &wrap_nest->nested_join->join_list;
1945 
1946       /*
1947         An important note, if this 'PREPARE stmt'.
1948         The FROM clause of the outer query now looks like
1949         CONCAT(original FROM clause of outer query, sj-nest).
1950         Given that the original FROM clause is reversed, this list is
1951         interpreted as "sj-nest is first".
1952         Thus, at a next execution, setup_natural_join_types() will decide that
1953         the name resolution context of the FROM clause should start at the
1954         first inner table in sj-nest.
1955         However, note that in the present function we do not change
1956         first_name_resolution_table (and friends) of sj-inner tables.
1957         So, at the next execution, name resolution for columns of
1958         outer-table columns is bound to fail (the first inner table does
1959         not have outer tables in its chain of resolution).
1960         Fortunately, Item_field::cached_table, which is set during resolution
1961         of 'PREPARE stmt', gives us the answer and avoids a failing search.
1962       */
1963 
1964       /*
1965         wrap_nest will take place of outer_tbl, so move the outer join flag
1966         and join condition.
1967       */
1968       wrap_nest->outer_join= outer_tbl->outer_join;
1969       outer_tbl->outer_join= 0;
1970 
1971       // There are item-rollback problems in this function: see bug#16926177
1972       wrap_nest->set_join_cond(outer_tbl->join_cond()->real_item());
1973       outer_tbl->set_join_cond(NULL);
1974 
1975       List_iterator<TABLE_LIST> li(*wrap_nest->join_list);
1976       TABLE_LIST *tbl;
1977       while ((tbl= li++))
1978       {
1979         if (tbl == outer_tbl)
1980         {
1981           li.replace(wrap_nest);
1982           break;
1983         }
1984       }
1985 
1986       /*
1987         outer_tbl is replaced by wrap_nest.
1988         For subselects, update embedding_join_nest to point to wrap_nest
1989         instead of outer_tbl
1990       */
1991       for (Item_exists_subselect **subquery= sj_candidates->begin();
1992            subquery < sj_candidates->end();
1993            subquery++)
1994       {
1995         if ((*subquery)->embedding_join_nest == outer_tbl)
1996           (*subquery)->embedding_join_nest= wrap_nest;
1997       }
1998 
1999       /*
2000         Ok now wrap_nest 'contains' outer_tbl and we're ready to add the
2001         semi-join nest into it
2002       */
2003       emb_join_list= &wrap_nest->nested_join->join_list;
2004       emb_tbl_nest=  wrap_nest;
2005     }
2006   }
2007 
2008   TABLE_LIST *const sj_nest=
2009     TABLE_LIST::new_nested_join(thd->mem_root, "(sj-nest)",
2010                                 emb_tbl_nest, emb_join_list, this);
2011   if (sj_nest == NULL)
2012     DBUG_RETURN(true);       /* purecov: inspected */
2013 
2014   NESTED_JOIN *const nested_join= sj_nest->nested_join;
2015 
2016   /* Nests do not participate in those 'chains', so: */
2017   /* sj_nest->next_leaf= sj_nest->next_local= sj_nest->next_global == NULL*/
2018   emb_join_list->push_back(sj_nest);
2019 
2020   /*
2021     Natural joins inside a semi-join nest were already processed when the
2022     subquery went through initial preparation.
2023   */
2024   sj_nest->nested_join->natural_join_processed= true;
2025   /*
2026     nested_join->used_tables and nested_join->not_null_tables are
2027     initialized in simplify_joins().
2028   */
2029 
2030   st_select_lex *const subq_select= subq_pred->unit->first_select();
2031 
2032   nested_join->query_block_id= subq_select->select_number;
2033 
2034   // Merge tables from underlying query block into this join nest
2035   if (sj_nest->merge_underlying_tables(subq_select))
2036     DBUG_RETURN(true);       /* purecov: inspected */
2037 
2038   /*
2039     Add tables from subquery at end of leaf table chain.
2040     (This also means that table map for parent query block tables are unchanged)
2041   */
2042   TABLE_LIST *tl;
2043   for (tl= leaf_tables; tl->next_leaf; tl= tl->next_leaf)
2044   {}
2045   tl->next_leaf= subq_select->leaf_tables;
2046 
2047   // Add tables from subquery at end of next_local chain.
2048   for (tl= get_table_list(); tl->next_local; tl= tl->next_local)
2049   {}
2050   tl->next_local= subq_select->get_table_list();
2051 
2052   // Note that subquery's tables are already in the next_global chain
2053 
2054   // Remove the original subquery predicate from the WHERE/ON
2055   // The subqueries were replaced for Item_int(1) earlier
2056   // @todo also reset the 'with_subselect' there.
2057 
2058   // Walk through child's tables and adjust table map
2059   uint table_no= leaf_table_count;
2060   for (tl= subq_select->leaf_tables; tl; tl= tl->next_leaf, table_no++)
2061     tl->set_tableno(table_no);
2062 
2063   // Adjust table and expression counts in parent query block:
2064   derived_table_count+= subq_select->derived_table_count;
2065   materialized_derived_table_count+=
2066     subq_select->materialized_derived_table_count;
2067   has_sj_nests|= subq_select->has_sj_nests;
2068   partitioned_table_count+= subq_select->partitioned_table_count;
2069   leaf_table_count+= subq_select->leaf_table_count;
2070   cond_count+= subq_select->cond_count;
2071   between_count+= subq_select->between_count;
2072 
2073   if (outer_join)
2074     propagate_nullability(&sj_nest->nested_join->join_list, true);
2075 
2076   nested_join->sj_outer_exprs.empty();
2077   nested_join->sj_inner_exprs.empty();
2078 
2079   /*
2080     @todo: Add similar conversion for subqueries other than IN.
2081   */
2082   if (subq_pred->substype() == Item_subselect::IN_SUBS)
2083   {
2084     Item_in_subselect *in_subq_pred= (Item_in_subselect *)subq_pred;
2085 
2086     assert(is_fixed_or_outer_ref(in_subq_pred->left_expr));
2087 
2088     in_subq_pred->exec_method= Item_exists_subselect::EXEC_SEMI_JOIN;
2089     /*
2090       sj_corr_tables is supposed to contain non-trivially correlated tables,
2091       but here it is set to contain all correlated tables.
2092       @todo: Add analysis step that assigns only the set of non-trivially
2093       correlated tables to sj_corr_tables.
2094     */
2095     nested_join->sj_corr_tables= subq_pred->used_tables();
2096 
2097     /*
2098       sj_depends_on contains the set of outer tables referred in the
2099       subquery's WHERE clause as well as tables referred in the IN predicate's
2100       left-hand side.
2101     */
2102     nested_join->sj_depends_on=  subq_pred->used_tables() |
2103                                  in_subq_pred->left_expr->used_tables();
2104 
2105     // Put the subquery's WHERE into semi-join's condition.
2106     Item *sj_cond= subq_select->where_cond();
2107 
2108     /*
2109     Create the IN-equalities and inject them into semi-join's ON condition.
2110     Additionally, for LooseScan strategy
2111      - Record the number of IN-equalities.
2112      - Create list of pointers to (oe1, ..., ieN). We'll need the list to
2113        see which of the expressions are bound and which are not (for those
2114        we'll produce a distinct stream of (ie_i1,...ie_ik).
2115 
2116        (TODO: can we just create a list of pointers and hope the expressions
2117        will not substitute themselves on fix_fields()? or we need to wrap
2118        them into Item_direct_view_refs and store pointers to those. The
2119        pointers to Item_direct_view_refs are guaranteed to be stable as
2120        Item_direct_view_refs doesn't substitute itself with anything in
2121        Item_direct_view_ref::fix_fields.
2122 
2123     We have a special case for IN predicates with a scalar subquery or a
2124     row subquery in the predicand (left operand), such as this:
2125        (SELECT 1,2 FROM t1) IN (SELECT x,y FROM t2)
2126     We cannot make the join condition 1=x AND 2=y, since that might evaluate
2127     to TRUE even if t1 is empty. Instead make the join condition
2128     (SELECT 1,2 FROM t1) = (x,y) in this case.
2129 
2130     */
2131 
2132     Item_subselect *left_subquery=
2133       (in_subq_pred->left_expr->type() == Item::SUBSELECT_ITEM) ?
2134       static_cast<Item_subselect *>(in_subq_pred->left_expr) : NULL;
2135 
2136     if (left_subquery &&
2137         (left_subquery->substype() == Item_subselect::SINGLEROW_SUBS))
2138     {
2139       List<Item> ref_list;
2140       Item *header= subq_select->ref_pointer_array[0];
2141       for (uint i= 1; i < in_subq_pred->left_expr->cols(); i++)
2142       {
2143         ref_list.push_back(subq_select->ref_pointer_array[i]);
2144       }
2145 
2146       Item_row *right_expr= new Item_row(header, ref_list);
2147       if (!right_expr)
2148         DBUG_RETURN(true);      /* purecov: inspected */
2149 
2150       nested_join->sj_outer_exprs.push_back(in_subq_pred->left_expr);
2151       nested_join->sj_inner_exprs.push_back(right_expr);
2152       Item_func_eq *item_eq=
2153         new Item_func_eq(in_subq_pred->left_expr,
2154                          right_expr);
2155       if (item_eq == NULL)
2156         DBUG_RETURN(true);      /* purecov: inspected */
2157 
2158       sj_cond= and_items(sj_cond, item_eq);
2159       if (sj_cond == NULL)
2160         DBUG_RETURN(true);      /* purecov: inspected */
2161     }
2162     else
2163     {
2164       for (uint i= 0; i < in_subq_pred->left_expr->cols(); i++)
2165       {
2166         Item *const li= in_subq_pred->left_expr->element_index(i);
2167         nested_join->sj_outer_exprs.push_back(li);
2168         nested_join->sj_inner_exprs.push_back(subq_select->ref_pointer_array[i]);
2169 
2170         Item_func_eq *item_eq=
2171           new Item_func_eq(li, subq_select->ref_pointer_array[i]);
2172 
2173         if (item_eq == NULL)
2174           DBUG_RETURN(true);      /* purecov: inspected */
2175 
2176         /*
2177           li [left_expr->element_index(i)] can be a transient Item_outer_ref,
2178           whose usage has already been marked for rollback, but we need to roll
2179           back this location (inside Item_func_eq) in stead, since this is the
2180           place that matters after this semijoin transformation. arguments()
2181           gets the address of li as stored in item_eq ("place").
2182         */
2183         thd->replace_rollback_place(item_eq->arguments());
2184 
2185         sj_cond= and_items(sj_cond, item_eq);
2186         if (sj_cond == NULL)
2187           DBUG_RETURN(true);      /* purecov: inspected */
2188       }
2189     }
2190     // Fix the created equality and AND
2191 
2192     Opt_trace_array sj_on_trace(&thd->opt_trace,
2193                                 "evaluating_constant_semijoin_conditions");
2194     sj_cond->top_level_item();
2195     if (sj_cond->fix_fields(thd, &sj_cond))
2196       DBUG_RETURN(true);          /* purecov: inspected */
2197 
2198     // Attach semi-join condition to semi-join nest
2199     sj_nest->set_sj_cond(sj_cond);
2200   }
2201 
2202   // Unlink the subquery's query expression:
2203   subq_select->master_unit()->exclude_level();
2204 
2205   // Merge subquery's name resolution contexts into parent's
2206   merge_contexts(subq_select);
2207 
2208   repoint_contexts_of_join_nests(subq_select->top_join_list);
2209 
2210   // Update table map for the semi-join condition
2211   sj_nest->sj_cond()->fix_after_pullout(this, subq_select);
2212 
2213   // Update table map for semi-join nest's WHERE condition and join conditions
2214   fix_tables_after_pullout(this, subq_select, sj_nest, 0);
2215 
2216   //TODO fix QT_
2217   DBUG_EXECUTE("where",
2218                print_where(sj_nest->sj_cond(),"SJ-COND", QT_ORDINARY););
2219 
2220   if (emb_tbl_nest)
2221   {
2222     // Inject semi-join condition into parent's join condition
2223     emb_tbl_nest->set_join_cond(and_items(emb_tbl_nest->join_cond(),
2224                                           sj_nest->sj_cond()));
2225     if (emb_tbl_nest->join_cond() == NULL)
2226       DBUG_RETURN(true);
2227     emb_tbl_nest->join_cond()->top_level_item();
2228     if (!emb_tbl_nest->join_cond()->fixed &&
2229         emb_tbl_nest->join_cond()->fix_fields(thd,
2230                                               emb_tbl_nest->join_cond_ref()))
2231       DBUG_RETURN(true);
2232   }
2233   else
2234   {
2235     // Inject semi-join condition into parent's WHERE condition
2236     m_where_cond= and_items(m_where_cond, sj_nest->sj_cond());
2237     if (m_where_cond == NULL)
2238       DBUG_RETURN(true);
2239     m_where_cond->top_level_item();
2240     if (m_where_cond->fix_fields(thd, &m_where_cond))
2241       DBUG_RETURN(true);
2242   }
2243 
2244   if (subq_select->ftfunc_list->elements &&
2245       add_ftfunc_list(subq_select->ftfunc_list))
2246     DBUG_RETURN(true);        /* purecov: inspected */
2247 
2248   // This query block has semi-join nests
2249   has_sj_nests= true;
2250 
2251   DBUG_RETURN(false);
2252 }
2253 
2254 
2255 /**
2256   Merge a derived table or view into a query block.
2257   If some constraint prevents the derived table from being merged then do
2258   nothing, which means the table will be prepared for materialization later.
2259 
2260   After this call, check is_merged() to see if the table was really merged.
2261 
2262   @param thd           Thread handler
2263   @param derived_table Derived table which is to be merged.
2264 
2265   @return false if successful, true if error
2266 */
2267 
merge_derived(THD * thd,TABLE_LIST * derived_table)2268 bool SELECT_LEX::merge_derived(THD *thd, TABLE_LIST *derived_table)
2269 {
2270   DBUG_ENTER("SELECT_LEX::merge_derived");
2271 
2272   if (!derived_table->is_view_or_derived() || derived_table->is_merged())
2273     DBUG_RETURN(false);
2274 
2275   SELECT_LEX_UNIT *const derived_unit= derived_table->derived_unit();
2276 
2277   // A derived table must be prepared before we can merge it
2278   assert(derived_unit->is_prepared());
2279 
2280   LEX *const lex= parent_lex;
2281 
2282   // Check whether the outer query allows merged views
2283   if ((master_unit() == lex->unit &&
2284        !lex->can_use_merged()) ||
2285       lex->can_not_use_merged())
2286     DBUG_RETURN(false);
2287 
2288   /*
2289     @todo: The implementation of LEX::can_use_merged() currently avoids
2290            merging of views that are contained in other views if
2291            can_use_merged() returns false.
2292   */
2293   // Check whether derived table is mergeable, and directives allow merging
2294   if (!derived_unit->is_mergeable() ||
2295       derived_table->algorithm == VIEW_ALGORITHM_TEMPTABLE ||
2296       (!thd->optimizer_switch_flag(OPTIMIZER_SWITCH_DERIVED_MERGE) &&
2297        derived_table->algorithm != VIEW_ALGORITHM_MERGE))
2298     DBUG_RETURN(false);
2299 
2300   SELECT_LEX *const derived_select= derived_unit->first_select();
2301   /*
2302     If STRAIGHT_JOIN is specified, it is not valid to merge in a query block
2303     that contains semi-join nests
2304   */
2305   if ((active_options() & SELECT_STRAIGHT_JOIN) && derived_select->has_sj_nests)
2306     DBUG_RETURN(false);
2307 
2308   // Check that we have room for the merged tables in the table map:
2309   if (leaf_table_count + derived_select->leaf_table_count - 1 > MAX_TABLES)
2310     DBUG_RETURN(false);
2311 
2312   derived_table->set_merged();
2313 
2314   DBUG_PRINT("info", ("algorithm: MERGE"));
2315 
2316   Opt_trace_context *const trace= &thd->opt_trace;
2317   Opt_trace_object trace_wrapper(trace);
2318   Opt_trace_object trace_derived(trace,
2319                                  derived_table->is_view() ? "view" : "derived");
2320   trace_derived.add_utf8_table(derived_table).
2321     add("select#", derived_select->select_number).
2322     add("merged", true);
2323 
2324   Prepared_stmt_arena_holder ps_arena_holder(thd);
2325 
2326   // Save offset for table number adjustment
2327   uint table_adjust= derived_table->tableno();
2328 
2329   // Set up permanent list of underlying tables of a merged view
2330   derived_table->merge_underlying_list= derived_select->get_table_list();
2331 
2332   /**
2333     A view is updatable if any underlying table is updatable.
2334     A view is insertable-into if all underlying tables are insertable.
2335     A view is not updatable nor insertable if it contains an outer join
2336     @see mysql_register_view()
2337   */
2338   if (derived_table->is_view())
2339   {
2340     bool updatable= false;
2341     bool insertable= true;
2342     bool outer_joined= false;
2343     for (TABLE_LIST *tr= derived_table->merge_underlying_list;
2344          tr;
2345          tr= tr->next_local)
2346     {
2347       updatable|= tr->is_updatable();
2348       insertable&= tr->is_insertable();
2349       outer_joined|= tr->is_inner_table_of_outer_join();
2350     }
2351     updatable&= !outer_joined;
2352     insertable&= !outer_joined;
2353     if (updatable)
2354       derived_table->set_updatable();
2355     if (insertable)
2356       derived_table->set_insertable();
2357   }
2358   // Add a nested join object to the derived table object
2359   if (!(derived_table->nested_join=
2360        (NESTED_JOIN *) thd->mem_calloc(sizeof(NESTED_JOIN))))
2361     DBUG_RETURN(true);        /* purecov: inspected */
2362   derived_table->nested_join->join_list.empty();//Should be done by constructor!
2363 
2364   // Merge tables from underlying query block into this join nest
2365   if (derived_table->merge_underlying_tables(derived_select))
2366     DBUG_RETURN(true);       /* purecov: inspected */
2367 
2368   // Replace derived table in leaf table list with underlying tables:
2369   for (TABLE_LIST **tl= &leaf_tables; *tl; tl= &(*tl)->next_leaf)
2370   {
2371     if (*tl == derived_table)
2372     {
2373       for (TABLE_LIST *leaf= derived_select->leaf_tables; leaf;
2374            leaf= leaf->next_leaf)
2375       {
2376         if (leaf->next_leaf == NULL)
2377         {
2378           leaf->next_leaf= (*tl)->next_leaf;
2379           break;
2380         }
2381       }
2382       *tl= derived_select->leaf_tables;
2383       break;
2384     }
2385   }
2386 
2387   leaf_table_count+= (derived_select->leaf_table_count - 1);
2388   derived_table_count+= derived_select->derived_table_count;
2389   materialized_derived_table_count+=
2390     derived_select->materialized_derived_table_count;
2391   has_sj_nests|= derived_select->has_sj_nests;
2392   partitioned_table_count+= derived_select->partitioned_table_count;
2393   cond_count+= derived_select->cond_count;
2394   between_count+= derived_select->between_count;
2395 
2396   // Propagate schema table indication:
2397   // @todo: Add to BASE options instead
2398   if (derived_select->active_options() & OPTION_SCHEMA_TABLE)
2399     add_base_options(OPTION_SCHEMA_TABLE);
2400 
2401   // Propagate nullability for derived tables within outer joins:
2402   if (derived_table->is_inner_table_of_outer_join())
2403     propagate_nullability(&derived_table->nested_join->join_list, true);
2404 
2405   select_n_having_items+= derived_select->select_n_having_items;
2406 
2407   // Merge the WHERE clause into the outer query block
2408   if (derived_table->merge_where(thd))
2409     DBUG_RETURN(true);        /* purecov: inspected */
2410 
2411   if (derived_table->create_field_translation(thd))
2412     DBUG_RETURN(true);        /* purecov: inspected */
2413 
2414   // Exclude the derived table query expression from query graph.
2415   derived_unit->exclude_level();
2416 
2417   // Don't try to access it:
2418   derived_table->set_derived_unit((SELECT_LEX_UNIT *)1);
2419 
2420   // Merge subquery's name resolution contexts into parent's
2421   merge_contexts(derived_select);
2422 
2423   repoint_contexts_of_join_nests(derived_select->top_join_list);
2424 
2425   // Leaf tables have been shuffled, so update table numbers for them
2426   remap_tables(thd);
2427 
2428   // Update table info of referenced expressions after query block is merged
2429   fix_tables_after_pullout(this, derived_select, derived_table, table_adjust);
2430 
2431   if (derived_select->is_ordered())
2432   {
2433     /*
2434       An ORDER BY clause is moved to an outer query block
2435       - if the outer query block allows ordering, and
2436       - that refers to this view/derived table only, and
2437       - is not part of a UNION, and
2438       - may have a WHERE clause but is not grouped or aggregated and is not
2439         itself ordered.
2440      Otherwise the ORDER BY clause is ignored.
2441 
2442      Only SELECT statements and single-table UPDATE and DELETE statements
2443      allow ordering.
2444 
2445      Up to version 5.6 included, ORDER BY was unconditionally merged.
2446      Currently we only merge in the simple case above, which ensures
2447      backward compatibility for most reasonable use cases.
2448 
2449      Note that table numbers in order_list do not need updating, since
2450      the outer query contains only one table reference.
2451     */
2452     // LIMIT currently blocks derived table merge
2453     assert(!derived_select->has_limit());
2454 
2455     if ((lex->sql_command == SQLCOM_SELECT ||
2456          lex->sql_command == SQLCOM_UPDATE ||
2457          lex->sql_command == SQLCOM_DELETE) &&
2458         !(master_unit()->is_union() ||
2459           is_grouped() ||
2460           is_distinct() ||
2461           is_ordered() ||
2462           get_table_list()->next_local != NULL))
2463     {
2464       order_list.push_back(&derived_select->order_list);
2465       /*
2466         If at outer-most level (not within another derived table), ensure
2467         the ordering columns are marked in read_set, since columns selected
2468         from derived tables are not marked in initial resolving.
2469       */
2470       if (!thd->derived_tables_processing)
2471       {
2472         Mark_field mf(thd->mark_used_columns);
2473         for (ORDER *o = derived_select->order_list.first; o != NULL;
2474              o = o->next)
2475           o->item[0]->walk(&Item::mark_field_in_map, Item::WALK_POSTFIX,
2476                            pointer_cast<uchar *>(&mf));
2477       }
2478     }
2479     else
2480     {
2481       derived_select->empty_order_list(this);
2482       trace_derived.add_alnum("transformations_to_derived_table",
2483                               "removed_ordering");
2484     }
2485   }
2486 
2487   // Add any full-text functions from derived table into outer query
2488   if (derived_select->ftfunc_list->elements &&
2489       add_ftfunc_list(derived_select->ftfunc_list))
2490     DBUG_RETURN(true);        /* purecov: inspected */
2491 
2492   DBUG_RETURN(false);
2493 }
2494 
2495 
2496 /**
2497    Destructively replaces a sub-condition inside a condition tree. The
2498    parse tree is also altered.
2499 
2500    @note Because of current requirements for semijoin flattening, we do not
2501    need to recurse here, hence this function will only examine the top-level
2502    AND conditions. (see SELECT_LEX::prepare, comment starting with "Check if
2503    the subquery predicate can be executed via materialization".)
2504 
2505    @param thd  thread handler
2506 
2507    @param tree Must be the handle to the top level condition. This is needed
2508    when the top-level condition changes.
2509 
2510    @param old_cond The condition to be replaced.
2511 
2512    @param new_cond The condition to be substituted.
2513 
2514    @param do_fix_fields If true, Item::fix_fields(THD*, Item**) is called for
2515    the new condition.
2516 
2517    @return error status
2518 
2519    @retval true If there was an error.
2520    @retval false If successful.
2521 */
2522 
replace_subcondition(THD * thd,Item ** tree,Item * old_cond,Item * new_cond,bool do_fix_fields)2523 static bool replace_subcondition(THD *thd, Item **tree,
2524                                  Item *old_cond, Item *new_cond,
2525                                  bool do_fix_fields)
2526 {
2527   if (*tree == old_cond)
2528   {
2529     *tree= new_cond;
2530     if (do_fix_fields && new_cond->fix_fields(thd, tree))
2531       return TRUE;
2532     return FALSE;
2533   }
2534   else if ((*tree)->type() == Item::COND_ITEM)
2535   {
2536     List_iterator<Item> li(*((Item_cond*)(*tree))->argument_list());
2537     Item *item;
2538     while ((item= li++))
2539     {
2540       if (item == old_cond)
2541       {
2542         li.replace(new_cond);
2543         if (do_fix_fields && new_cond->fix_fields(thd, li.ref()))
2544           return TRUE;
2545         return FALSE;
2546       }
2547     }
2548   }
2549   else
2550     // If we came here it means there were an error during prerequisites check.
2551     assert(FALSE);
2552 
2553   return TRUE;
2554 }
2555 
2556 
2557 /*
2558   Convert semi-join subquery predicates into semi-join join nests
2559 
2560   DESCRIPTION
2561 
2562     Convert candidate subquery predicates into semi-join join nests. This
2563     transformation is performed once in query lifetime and is irreversible.
2564 
2565     Conversion of one subquery predicate
2566     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2567     We start with a join that has a semi-join subquery:
2568 
2569       SELECT ...
2570       FROM ot, ...
2571       WHERE oe IN (SELECT ie FROM it1 ... itN WHERE subq_where) AND outer_where
2572 
2573     and convert it into a semi-join nest:
2574 
2575       SELECT ...
2576       FROM ot SEMI JOIN (it1 ... itN), ...
2577       WHERE outer_where AND subq_where AND oe=ie
2578 
2579     that is, in order to do the conversion, we need to
2580 
2581      * Create the "SEMI JOIN (it1 .. itN)" part and add it into the parent
2582        query's FROM structure.
2583      * Add "AND subq_where AND oe=ie" into parent query's WHERE (or ON if
2584        the subquery predicate was in an ON expression)
2585      * Remove the subquery predicate from the parent query's WHERE
2586 
2587     Considerations when converting many predicates
2588     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2589     A join may have at most MAX_TABLES tables. This may prevent us from
2590     flattening all subqueries when the total number of tables in parent and
2591     child selects exceeds MAX_TABLES. In addition, one slot is reserved per
2592     semi-join nest, in case the subquery needs to be materialized in a
2593     temporary table.
2594     We deal with this problem by flattening children's subqueries first and
2595     then using a heuristic rule to determine each subquery predicate's
2596     "priority".
2597 
2598   RETURN
2599     FALSE  OK
2600     TRUE   Error
2601 */
flatten_subqueries()2602 bool SELECT_LEX::flatten_subqueries()
2603 {
2604   DBUG_ENTER("flatten_subqueries");
2605 
2606   if (sj_candidates->empty())
2607     DBUG_RETURN(FALSE);
2608 
2609   Item_exists_subselect **subq,
2610     **subq_begin= sj_candidates->begin(),
2611     **subq_end= sj_candidates->end();
2612 
2613   THD *const thd= (*subq_begin)->unit->thd;
2614   Opt_trace_context *const trace= &thd->opt_trace;
2615 
2616   /*
2617     Semijoin flattening is bottom-up. Indeed, we have this execution flow,
2618     for SELECT#1 WHERE X IN (SELECT #2 WHERE Y IN (SELECT#3)) :
2619 
2620     SELECT_LEX::prepare() (select#1)
2621        -> fix_fields() on IN condition
2622            -> SELECT_LEX::prepare() on subquery (select#2)
2623                -> fix_fields() on IN condition
2624                     -> SELECT_LEX::prepare() on subquery (select#3)
2625                     <- SELECT_LEX::prepare()
2626                <- fix_fields()
2627                -> flatten_subqueries: merge #3 in #2
2628                <- flatten_subqueries
2629            <- SELECT_LEX::prepare()
2630        <- fix_fields()
2631        -> flatten_subqueries: merge #2 in #1
2632 
2633     Note that flattening of #(N) is done by its parent JOIN#(N-1), because
2634     there are cases where flattening is not possible and only the parent can
2635     know.
2636    */
2637   for (subq= subq_begin; subq < subq_end; subq++)
2638   {
2639     /*
2640       Currently, we only support transformation of IN subqueries.
2641     */
2642     assert((*subq)->substype() == Item_subselect::IN_SUBS);
2643 
2644     st_select_lex *child_select= (*subq)->unit->first_select();
2645 
2646     // Check that we proceeded bottom-up
2647     assert(child_select->sj_candidates == NULL);
2648 
2649     (*subq)->sj_convert_priority=
2650       (((*subq)->unit->uncacheable & UNCACHEABLE_DEPENDENT) ? MAX_TABLES : 0) +
2651       child_select->leaf_table_count;
2652   }
2653 
2654   /*
2655     2. Pick which subqueries to convert:
2656       sort the subquery array
2657       - prefer correlated subqueries over uncorrelated;
2658       - prefer subqueries that have greater number of outer tables;
2659   */
2660   my_qsort(subq_begin,
2661            sj_candidates->size(), sj_candidates->element_size(),
2662            reinterpret_cast<qsort_cmp>(subq_sj_candidate_cmp));
2663 
2664   // A permanent transformation is going to start, so:
2665   Prepared_stmt_arena_holder ps_arena_holder(thd);
2666 
2667   // #tables-in-parent-query + #tables-in-subquery + sj nests <= MAX_TABLES
2668   /* Replace all subqueries to be flattened with Item_int(1) */
2669 
2670   uint table_count= leaf_table_count;
2671   for (subq= subq_begin; subq < subq_end; subq++)
2672   {
2673     // Add the tables in the subquery nest plus one in case of materialization:
2674     const uint tables_added=
2675       (*subq)->unit->first_select()->leaf_table_count + 1;
2676     (*subq)->sj_chosen= table_count + tables_added <= MAX_TABLES;
2677 
2678     if (!(*subq)->sj_chosen)
2679       continue;
2680 
2681     table_count+= tables_added;
2682 
2683     // In WHERE/ON of parent query, replace IN(subq) with "1" (<=>TRUE)
2684     Item **tree= ((*subq)->embedding_join_nest == NULL) ?
2685                  &m_where_cond :
2686                 (*subq)->embedding_join_nest->join_cond_ref();
2687     if (replace_subcondition(thd, tree, *subq, new Item_int(1), FALSE))
2688       DBUG_RETURN(TRUE); /* purecov: inspected */
2689   }
2690 
2691   for (subq= subq_begin; subq < subq_end; subq++)
2692   {
2693     if (!(*subq)->sj_chosen)
2694       continue;
2695 
2696     OPT_TRACE_TRANSFORM(trace, oto0, oto1,
2697                         (*subq)->unit->first_select()->select_number,
2698                         "IN (SELECT)", "semijoin");
2699     oto1.add("chosen", true);
2700     if (convert_subquery_to_semijoin(*subq))
2701       DBUG_RETURN(TRUE);
2702   }
2703   /*
2704     3. Finalize the subqueries that we did not convert,
2705        ie. perform IN->EXISTS rewrite.
2706   */
2707   for (subq= subq_begin; subq < subq_end; subq++)
2708   {
2709     if ((*subq)->sj_chosen)
2710       continue;
2711     {
2712       OPT_TRACE_TRANSFORM(trace, oto0, oto1,
2713                           (*subq)->unit->first_select()->select_number,
2714                           "IN (SELECT)", "semijoin");
2715       oto1.add("chosen", false);
2716     }
2717     Item_subselect::trans_res res;
2718     (*subq)->changed= 0;
2719     (*subq)->fixed= 0;
2720 
2721     SELECT_LEX *save_select_lex= thd->lex->current_select();
2722     thd->lex->set_current_select((*subq)->unit->first_select());
2723 
2724     // This is the only part of the function which uses a JOIN.
2725     res= (*subq)->select_transformer((*subq)->unit->first_select());
2726 
2727     thd->lex->set_current_select(save_select_lex);
2728 
2729     if (res == Item_subselect::RES_ERROR)
2730       DBUG_RETURN(TRUE);
2731 
2732     (*subq)->changed= 1;
2733     (*subq)->fixed= 1;
2734 
2735     Item *substitute= (*subq)->substitution;
2736     const bool do_fix_fields= !(*subq)->substitution->fixed;
2737     const bool subquery_in_join_clause= (*subq)->embedding_join_nest != NULL;
2738 
2739     Item **tree= subquery_in_join_clause ?
2740       ((*subq)->embedding_join_nest->join_cond_ref()) : &m_where_cond;
2741     if (replace_subcondition(thd, tree, *subq, substitute, do_fix_fields))
2742       DBUG_RETURN(TRUE);
2743     (*subq)->substitution= NULL;
2744   }
2745 
2746   sj_candidates->clear();
2747   DBUG_RETURN(FALSE);
2748 }
2749 
is_in_select_list(Item * cand)2750 bool SELECT_LEX::is_in_select_list(Item *cand) {
2751   List_iterator<Item> li(fields_list);
2752   Item *item;
2753   while ((item = li++)) {
2754     // Use a walker to detect if cand is present in this select item
2755 
2756     if (item->walk(&Item::find_item_processor, Item::WALK_SUBQUERY_POSTFIX,
2757                    pointer_cast<uchar *>(cand)))
2758       return true;
2759   }
2760   return false;
2761 }
2762 
2763 /**
2764   Propagate nullability into inner tables of outer join operation
2765 
2766   @param tables: List of tables and join nests, start at top_join_list
2767   @param nullable: true: Set all underlying tables as nullable
2768 */
propagate_nullability(List<TABLE_LIST> * tables,bool nullable)2769 static void propagate_nullability(List<TABLE_LIST> *tables, bool nullable)
2770 {
2771   List_iterator<TABLE_LIST> li(*tables);
2772   TABLE_LIST *tr;
2773 
2774   while ((tr= li++))
2775   {
2776     if (tr->table && !tr->table->is_nullable() && (nullable || tr->outer_join))
2777       tr->table->set_nullable();
2778     if (tr->nested_join == NULL)
2779       continue;
2780     propagate_nullability(&tr->nested_join->join_list,
2781                           nullable || tr->outer_join);
2782   }
2783 }
2784 
2785 
2786 /**
2787   Propagate exclusion from unique table check into all subqueries belonging
2788   to this query block.
2789 
2790   This function can be applied to all subqueries of a materialized derived
2791   table or view.
2792 */
2793 
propagate_unique_test_exclusion()2794 void st_select_lex::propagate_unique_test_exclusion()
2795 {
2796   for (SELECT_LEX_UNIT *unit= first_inner_unit(); unit; unit= unit->next_unit())
2797     for (SELECT_LEX *sl= unit->first_select(); sl; sl= sl->next_select())
2798       sl->propagate_unique_test_exclusion();
2799 
2800   exclude_from_table_unique_test= true;
2801 }
2802 
2803 
2804 /**
2805   Add a list of full-text function elements into a query block.
2806 
2807   @param ftfuncs   List of full-text function elements to add.
2808 
2809   @returns false if success, true if error
2810 */
2811 
add_ftfunc_list(List<Item_func_match> * ftfuncs)2812 bool SELECT_LEX::add_ftfunc_list(List<Item_func_match> *ftfuncs)
2813 {
2814   Item_func_match *ifm;
2815   List_iterator_fast<Item_func_match> li(*ftfuncs);
2816   while ((ifm= li++))
2817   {
2818     if (ftfunc_list->push_back(ifm))
2819       return true;        /* purecov: inspected */
2820   }
2821   return false;
2822 }
2823 
2824 
2825 /**
2826    Go through a list of tables and join nests, recursively, and repoint
2827    its select_lex pointer.
2828 
2829    @param  join_list  List of tables and join nests
2830 */
repoint_contexts_of_join_nests(List<TABLE_LIST> join_list)2831 void SELECT_LEX::repoint_contexts_of_join_nests(List<TABLE_LIST> join_list)
2832 {
2833   List_iterator_fast<TABLE_LIST> ti(join_list);
2834   TABLE_LIST *tbl;
2835   while ((tbl= ti++))
2836   {
2837     tbl->select_lex= this;
2838     if (tbl->nested_join)
2839       repoint_contexts_of_join_nests(tbl->nested_join->join_list);
2840   }
2841 }
2842 
2843 
2844 /**
2845   Merge name resolution context objects belonging to an inner subquery
2846   to parent query block.
2847   Update all context objects to have this base query block.
2848   Used when a subquery's query block is merged into its parent.
2849 
2850   @param inner  Subquery for which context objects are to be merged.
2851 */
merge_contexts(SELECT_LEX * inner)2852 void SELECT_LEX::merge_contexts(SELECT_LEX *inner)
2853 {
2854   for (Name_resolution_context *ctx= inner->first_context;
2855        ctx != NULL;
2856        ctx= ctx->next_context)
2857   {
2858     ctx->select_lex= this;
2859     if (ctx->next_context == NULL)
2860     {
2861       ctx->next_context= first_context;
2862       first_context= inner->first_context;
2863       inner->first_context= NULL;
2864       break;
2865     }
2866   }
2867 }
2868 
2869 
2870 /**
2871   Fix fields referenced from inner query blocks.
2872 
2873   @param thd               Thread handle
2874 
2875   @details
2876     The function serves 3 purposes
2877 
2878     - adds fields referenced from inner query blocks to the current select list
2879 
2880     - Decides which class to use to reference the items (Item_ref or
2881       Item_direct_ref)
2882 
2883     - fixes references (Item_ref objects) to these fields.
2884 
2885     If a field isn't already on the select list and the ref_ptrs array
2886     is provided then it is added to the all_fields list and the pointer to
2887     it is saved in the ref_ptrs array.
2888 
2889     The class to access the outer field is determined by the following rules:
2890 
2891     -#. If the outer field isn't used under an aggregate function then the
2892         Item_ref class should be used.
2893 
2894     -#. If the outer field is used under an aggregate function and this
2895         function is, in turn, aggregated in the query block where the outer
2896         field was resolved or some query nested therein, then the
2897         Item_direct_ref class should be used. Also it should be used if we are
2898         grouping by a subquery containing the outer field.
2899 
2900     The resolution is done here and not at the fix_fields() stage as
2901     it can be done only after aggregate functions are fixed and pulled up to
2902     selects where they are to be aggregated.
2903 
2904     When the class is chosen it substitutes the original field in the
2905     Item_outer_ref object.
2906 
2907     After this we proceed with fixing references (Item_outer_ref objects) to
2908     this field from inner subqueries.
2909 
2910   @return false if success, true if error
2911  */
2912 
fix_inner_refs(THD * thd)2913 bool SELECT_LEX::fix_inner_refs(THD *thd)
2914 {
2915   Item_outer_ref *ref;
2916 
2917   List_iterator<Item_outer_ref> ref_it(inner_refs_list);
2918   while ((ref= ref_it++))
2919   {
2920     bool direct_ref= false;
2921     Item *item= ref->outer_ref;
2922     Item **item_ref= ref->ref;
2923     /*
2924       TODO: this field item already might be present in the select list.
2925       In this case instead of adding new field item we could use an
2926       existing one. The change will lead to less operations for copying fields,
2927       smaller temporary tables and less data passed through filesort.
2928     */
2929     if (!ref_ptrs.is_null() && !ref->found_in_select_list)
2930     {
2931       /*
2932         Add the field item to the select list of the current select.
2933         If it's needed reset each Item_ref item that refers this field with
2934         a new reference taken from ref_pointer_array.
2935       */
2936       item_ref= add_hidden_item(item);
2937     }
2938 
2939     if (ref->in_sum_func)
2940     {
2941       if (ref->in_sum_func->nest_level > nest_level)
2942         direct_ref= true;
2943       else
2944       {
2945         for (Item_sum *sum_func= ref->in_sum_func;
2946              sum_func && sum_func->aggr_level >= nest_level;
2947              sum_func= sum_func->in_sum_func)
2948         {
2949           if (sum_func->aggr_level == nest_level)
2950           {
2951             direct_ref= true;
2952             break;
2953           }
2954         }
2955       }
2956     }
2957     else
2958     {
2959       /*
2960         Check if GROUP BY item trees contain the outer ref:
2961         in this case we have to use Item_direct_ref instead of Item_ref.
2962       */
2963       for (ORDER *group= group_list.first; group; group= group->next)
2964       {
2965         if ((*group->item)->walk(&Item::find_item_processor, walk_subquery,
2966                                  (uchar *) ref))
2967         {
2968           direct_ref= true;
2969           break;
2970         }
2971       }
2972     }
2973     Item_ref *const new_ref= direct_ref ?
2974               new Item_direct_ref(ref->context, item_ref, ref->table_name,
2975                                   ref->field_name, ref->is_alias_of_expr()) :
2976               new Item_ref(ref->context, item_ref, ref->table_name,
2977                            ref->field_name, ref->is_alias_of_expr());
2978     if (!new_ref)
2979       return true;         /* purecov: inspected */
2980     ref->outer_ref= new_ref;
2981     ref->ref= &ref->outer_ref;
2982 
2983     if (!ref->fixed && ref->fix_fields(thd, 0))
2984       return true;         /* purecov: inspected */
2985     thd->lex->used_tables|= item->used_tables();
2986     select_list_tables|= item->used_tables();
2987   }
2988   return false;
2989 }
2990 
2991 
2992 /**
2993    Since LIMIT is not supported for table subquery predicates
2994    (IN/ALL/EXISTS/etc), the following clauses are redundant for
2995    subqueries:
2996 
2997    ORDER BY
2998    DISTINCT
2999    GROUP BY   if there are no aggregate functions and no HAVING clause
3000 
3001    This removal is permanent. Thus, it only makes sense to call this function
3002    for regular queries and on first execution of SP/PS
3003 
3004    @param thd               thread handler
3005    @param hidden_group_field_count Number of hidden group fields added
3006                             by setup_group().
3007 */
3008 
3009 
remove_redundant_subquery_clauses(THD * thd,int hidden_group_field_count)3010 void SELECT_LEX::remove_redundant_subquery_clauses(THD *thd,
3011                                                    int hidden_group_field_count)
3012 {
3013   Item_subselect *subq_predicate= master_unit()->item;
3014   /*
3015     The removal should happen for IN, ALL, ANY and EXISTS subqueries,
3016     which means all but single row subqueries. Example single row
3017     subqueries:
3018        a) SELECT * FROM t1 WHERE t1.a = (<single row subquery>)
3019        b) SELECT a, (<single row subquery) FROM t1
3020    */
3021   if (subq_predicate->substype() == Item_subselect::SINGLEROW_SUBS)
3022     return;
3023 
3024   // A subquery that is not single row should be one of IN/ALL/ANY/EXISTS.
3025   assert (subq_predicate->substype() == Item_subselect::EXISTS_SUBS ||
3026           subq_predicate->substype() == Item_subselect::IN_SUBS     ||
3027           subq_predicate->substype() == Item_subselect::ALL_SUBS    ||
3028           subq_predicate->substype() == Item_subselect::ANY_SUBS);
3029 
3030   enum change
3031   {
3032     REMOVE_NONE=0,
3033     REMOVE_ORDER= 1 << 0,
3034     REMOVE_DISTINCT= 1 << 1,
3035     REMOVE_GROUP= 1 << 2
3036   };
3037 
3038   uint changelog= 0;
3039 
3040   if (order_list.elements)
3041   {
3042     changelog|= REMOVE_ORDER;
3043     empty_order_list(this);
3044   }
3045 
3046   if (is_distinct())
3047   {
3048     changelog|= REMOVE_DISTINCT;
3049     remove_base_options(SELECT_DISTINCT);
3050   }
3051 
3052   // Remove GROUP BY if there are no aggregate functions and no HAVING clause
3053 
3054   if (group_list.elements && !agg_func_used() && !having_cond())
3055   {
3056     changelog|= REMOVE_GROUP;
3057     for (ORDER *g= group_list.first; g != NULL; g= g->next)
3058     {
3059       if (*g->item == g->item_ptr)
3060         (*g->item)->walk(&Item::clean_up_after_removal, walk_subquery,
3061                          reinterpret_cast<uchar*>(this));
3062     }
3063     group_list.empty();
3064     while (hidden_group_field_count-- > 0)
3065     {
3066       all_fields.pop();
3067       ref_ptrs[all_fields.elements]= NULL;
3068     }
3069   }
3070 
3071   if (changelog)
3072   {
3073     Opt_trace_context * trace= &thd->opt_trace;
3074     if (unlikely(trace->is_started()))
3075     {
3076       Opt_trace_object trace_wrapper(trace);
3077       Opt_trace_array trace_changes(trace, "transformations_to_subquery");
3078       if (changelog & REMOVE_ORDER)
3079         trace_changes.add_alnum("removed_ordering");
3080       if (changelog & REMOVE_DISTINCT)
3081         trace_changes.add_alnum("removed_distinct");
3082       if (changelog & REMOVE_GROUP)
3083         trace_changes.add_alnum("removed_grouping");
3084     }
3085   }
3086 }
3087 
3088 
3089 /**
3090   Empty the ORDER list.
3091   Delete corresponding elements from all_fields and ref_ptrs too.
3092   If ORDER list contain any subqueries, delete them from the query block list.
3093 
3094   @param sl  Query block that possible subquery blocks in the ORDER BY clause
3095              are attached to (may be different from "this" when query block has
3096              been merged into an outer query block).
3097 */
3098 
empty_order_list(SELECT_LEX * sl)3099 void SELECT_LEX::empty_order_list(SELECT_LEX *sl)
3100 {
3101   for (ORDER *o= order_list.first; o != NULL; o= o->next)
3102   {
3103     /*
3104       Do not remove the order by expression if it has a view
3105       reference. There is possibility that this view reference could
3106       be used elsewhere in the query
3107     */
3108     if (*o->item == o->item_ptr &&
3109         (!o->item_ptr->has_subquery() ||
3110          !o->item_ptr->walk(&Item::is_direct_view_ref,
3111                             Item::WALK_SUBQUERY_PREFIX, NULL))) {
3112       (*o->item)->walk(&Item::clean_up_after_removal, walk_subquery,
3113                        pointer_cast<uchar *>(sl));
3114     }
3115   }
3116   order_list.empty();
3117   while (hidden_order_field_count-- > 0)
3118   {
3119     all_fields.pop();
3120     ref_ptrs[all_fields.elements]= NULL;
3121   }
3122 }
3123 
3124 
3125 /*****************************************************************************
3126   Group and order functions
3127 *****************************************************************************/
3128 
3129 /**
3130   Resolve an ORDER BY or GROUP BY column reference.
3131 
3132   Given a column reference (represented by 'order') from a GROUP BY or ORDER
3133   BY clause, find the actual column it represents. If the column being
3134   resolved is from the GROUP BY clause, the procedure searches the SELECT
3135   list 'fields' and the columns in the FROM list 'tables'. If 'order' is from
3136   the ORDER BY clause, only the SELECT list is being searched.
3137 
3138   If 'order' is resolved to an Item, then order->item is set to the found
3139   Item. If there is no item for the found column (that is, it was resolved
3140   into a table field), order->item is 'fixed' and is added to all_fields and
3141   ref_pointer_array.
3142 
3143   ref_pointer_array and all_fields are updated.
3144 
3145   @param[in] thd		     Pointer to current thread structure
3146   @param[in,out] ref_pointer_array  All select, group and order by fields
3147   @param[in] tables                 List of tables to search in (usually
3148     FROM clause)
3149   @param[in] order                  Column reference to be resolved
3150   @param[in] fields                 List of fields to search in (usually
3151     SELECT list)
3152   @param[in,out] all_fields         All select, group and order by fields
3153   @param[in] is_group_field         True if order is a GROUP field, false if
3154     ORDER by field
3155 
3156   @retval
3157     FALSE if OK
3158   @retval
3159     TRUE  if error occurred
3160 */
3161 
3162 static bool
find_order_in_list(THD * thd,Ref_ptr_array ref_pointer_array,TABLE_LIST * tables,ORDER * order,List<Item> & fields,List<Item> & all_fields,bool is_group_field)3163 find_order_in_list(THD *thd, Ref_ptr_array ref_pointer_array,
3164                    TABLE_LIST *tables, ORDER *order,
3165                    List<Item> &fields, List<Item> &all_fields,
3166                    bool is_group_field)
3167 {
3168   Item *order_item= *order->item; /* The item from the GROUP/ORDER caluse. */
3169   Item::Type order_item_type;
3170   Item **select_item; /* The corresponding item from the SELECT clause. */
3171   Field *from_field;  /* The corresponding field from the FROM clause. */
3172   uint counter;
3173   enum_resolution_type resolution;
3174 
3175   /*
3176     Local SP variables may be int but are expressions, not positions.
3177     (And they can't be used before fix_fields is called for them).
3178   */
3179   if (order_item->type() == Item::INT_ITEM && order_item->basic_const_item())
3180   {						/* Order by position */
3181     uint count= (uint) order_item->val_int();
3182     if (!count || count > fields.elements)
3183     {
3184       my_error(ER_BAD_FIELD_ERROR, MYF(0),
3185                order_item->full_name(), thd->where);
3186       return TRUE;
3187     }
3188     order->item= &ref_pointer_array[count - 1];
3189     order->in_field_list= 1;
3190     order->is_position= true;
3191     return FALSE;
3192   }
3193   /* Lookup the current GROUP/ORDER field in the SELECT clause. */
3194   select_item= find_item_in_list(order_item, fields, &counter,
3195                                  REPORT_EXCEPT_NOT_FOUND, &resolution);
3196   if (!select_item)
3197     return TRUE; /* The item is not unique, or some other error occured. */
3198 
3199 
3200   /* Check whether the resolved field is not ambiguos. */
3201   if (select_item != not_found_item)
3202   {
3203     Item *view_ref= NULL;
3204     /*
3205       If we have found field not by its alias in select list but by its
3206       original field name, we should additionally check if we have conflict
3207       for this name (in case if we would perform lookup in all tables).
3208     */
3209     if (resolution == RESOLVED_BEHIND_ALIAS && !order_item->fixed &&
3210         order_item->fix_fields(thd, order->item))
3211       return TRUE;
3212 
3213     /* Lookup the current GROUP field in the FROM clause. */
3214     order_item_type= order_item->type();
3215     from_field= not_found_field;
3216     if ((is_group_field &&
3217         order_item_type == Item::FIELD_ITEM) ||
3218         order_item_type == Item::REF_ITEM)
3219     {
3220       from_field= find_field_in_tables(thd, (Item_ident*) order_item, tables,
3221                                        NULL, &view_ref, IGNORE_ERRORS, TRUE,
3222                                        FALSE);
3223       if (thd->is_error())
3224         return true;
3225 
3226       if (!from_field)
3227         from_field= not_found_field;
3228     }
3229 
3230     if (from_field == not_found_field ||
3231         (from_field != view_ref_found ?
3232          /* it is field of base table => check that fields are same */
3233          ((*select_item)->type() == Item::FIELD_ITEM &&
3234           ((Item_field*) (*select_item))->field->eq(from_field)) :
3235          /*
3236            in is field of view table => check that references on translation
3237            table are same
3238          */
3239          ((*select_item)->type() == Item::REF_ITEM &&
3240           view_ref->type() == Item::REF_ITEM &&
3241           ((Item_ref *) (*select_item))->ref ==
3242           ((Item_ref *) view_ref)->ref)))
3243     {
3244       /*
3245         If there is no such field in the FROM clause, or it is the same field
3246         as the one found in the SELECT clause, then use the Item created for
3247         the SELECT field. As a result if there was a derived field that
3248         'shadowed' a table field with the same name, the table field will be
3249         chosen over the derived field.
3250 
3251         If we replace *order->item with one from the select list or
3252         from a table in the FROM list, we should clean up after
3253         removing the old *order->item from the query. The item has not
3254         been fixed (so there are no aggregation functions that need
3255         cleaning up), but it may contain subqueries that should be
3256         unlinked.
3257       */
3258       if (*order->item != *select_item)
3259         (*order->item)->walk(&Item::clean_up_after_removal, walk_subquery,
3260                              NULL);
3261       order->item= &ref_pointer_array[counter];
3262       order->in_field_list=1;
3263       if (resolution == RESOLVED_AGAINST_ALIAS)
3264         order->used_alias= true;
3265       return FALSE;
3266     }
3267     else
3268     {
3269       /*
3270         There is a field with the same name in the FROM clause. This
3271         is the field that will be chosen. In this case we issue a
3272         warning so the user knows that the field from the FROM clause
3273         overshadows the column reference from the SELECT list.
3274       */
3275       push_warning_printf(thd, Sql_condition::SL_WARNING, ER_NON_UNIQ_ERROR,
3276                           ER(ER_NON_UNIQ_ERROR),
3277                           ((Item_ident*) order_item)->field_name,
3278                           current_thd->where);
3279     }
3280   }
3281 
3282   order->in_field_list=0;
3283   /*
3284     The call to order_item->fix_fields() means that here we resolve
3285     'order_item' to a column from a table in the list 'tables', or to
3286     a column in some outer query. Exactly because of the second case
3287     we come to this point even if (select_item == not_found_item),
3288     inspite of that fix_fields() calls find_item_in_list() one more
3289     time.
3290 
3291     We check order_item->fixed because Item_func_group_concat can put
3292     arguments for which fix_fields already was called.
3293 
3294     group_fix_field= TRUE is to resolve aliases from the SELECT list
3295     without creating of Item_ref-s: JOIN::exec() wraps aliased items
3296     in SELECT list with Item_copy items. To re-evaluate such a tree
3297     that includes Item_copy items we have to refresh Item_copy caches,
3298     but:
3299       - filesort() never refresh Item_copy items,
3300       - end_send_group() checks every record for group boundary by the
3301         test_if_group_changed function that obtain data from these
3302         Item_copy items, but the copy_fields function that
3303         refreshes Item copy items is called after group boundaries only -
3304         that is a vicious circle.
3305     So we prevent inclusion of Item_copy items.
3306   */
3307   bool save_group_fix_field= thd->lex->current_select()->group_fix_field;
3308   if (is_group_field)
3309     thd->lex->current_select()->group_fix_field= TRUE;
3310   bool ret= (!order_item->fixed &&
3311       (order_item->fix_fields(thd, order->item) ||
3312        (order_item= *order->item)->check_cols(1)));
3313   thd->lex->current_select()->group_fix_field= save_group_fix_field;
3314   if (ret)
3315     return TRUE; /* Wrong field. */
3316 
3317   uint el= all_fields.elements;
3318   all_fields.push_front(order_item); /* Add new field to field list. */
3319   ref_pointer_array[el]= order_item;
3320   /*
3321     If the order_item is a SUM_FUNC_ITEM, when fix_fields is called
3322     ref_by is set to order->item which is the address of order_item.
3323     But this needs to be address of order_item in the all_fields list.
3324     As a result, when it gets replaced with Item_aggregate_ref
3325     object in Item::split_sum_func2, we will be able to retrieve the
3326     newly created object.
3327   */
3328   if (order_item->type() == Item::SUM_FUNC_ITEM)
3329     ((Item_sum *)order_item)->ref_by= all_fields.head_ref();
3330 
3331   /*
3332     Currently, we assume that this assertion holds. If it turns out
3333     that it fails for some query, order->item has changed and the old
3334     item is removed from the query. In that case, we must call walk()
3335     with clean_up_after_removal() on the old order->item.
3336   */
3337   assert(order_item == *order->item);
3338   order->item= &ref_pointer_array[el];
3339   return FALSE;
3340 }
3341 
3342 
3343 /**
3344   Resolve and setup list of expressions in ORDER BY clause.
3345 
3346   Change order to point at item in select list.
3347   If item isn't a number and doesn't exists in the select list, add it to the
3348   the field list.
3349 
3350   @param thd  Thread handler
3351 
3352   @returns false if success, true if error
3353 */
3354 
setup_order(THD * thd,Ref_ptr_array ref_pointer_array,TABLE_LIST * tables,List<Item> & fields,List<Item> & all_fields,ORDER * order)3355 bool setup_order(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables,
3356                  List<Item> &fields, List<Item> &all_fields, ORDER *order)
3357 {
3358   assert(order);
3359 
3360   SELECT_LEX *const select= thd->lex->current_select();
3361 
3362   thd->where="order clause";
3363 
3364   const bool for_union= select->master_unit()->is_union() &&
3365                         select == select->master_unit()->fake_select_lex;
3366   const bool is_aggregated= select->is_grouped();
3367 
3368   for (uint number= 1; order; order=order->next, number++)
3369   {
3370     if (find_order_in_list(thd, ref_pointer_array, tables, order, fields,
3371 			   all_fields, false))
3372       return true;
3373     if ((*order->item)->with_sum_func)
3374     {
3375       /*
3376         Aggregated expressions in ORDER BY are not supported by SQL standard,
3377         but MySQL has some limited support for them. The limitations are
3378         checked below:
3379 
3380         1. A UNION query is not aggregated, so ordering by a set function
3381            is always wrong.
3382       */
3383       if (for_union)
3384       {
3385         my_error(ER_AGGREGATE_ORDER_FOR_UNION, MYF(0), number);
3386         return true;
3387       }
3388 
3389       /*
3390         2. A non-aggregated query combined with a set function in ORDER BY
3391            that does not contain an outer reference is illegal, because it
3392            would cause the query to become aggregated.
3393            (Since is_aggregated is false, this expression would cause
3394             agg_func_used() to become true).
3395       */
3396       if (!is_aggregated && select->agg_func_used())
3397       {
3398         my_error(ER_AGGREGATE_ORDER_NON_AGG_QUERY, MYF(0), number);
3399         return true;
3400       }
3401     }
3402   }
3403   return false;
3404 }
3405 
3406 
3407 /**
3408    Runs checks mandated by ONLY_FULL_GROUP_BY
3409 
3410    @param  thd                     THD pointer
3411 
3412    @returns true if ONLY_FULL_GROUP_BY is violated.
3413 */
3414 
check_only_full_group_by(THD * thd)3415 bool SELECT_LEX::check_only_full_group_by(THD *thd)
3416 {
3417   bool rc= false;
3418 
3419   if (is_grouped())
3420   {
3421     MEM_ROOT root;
3422     /*
3423       "root" has very short lifetime, and should not consume much
3424       => not instrumented.
3425     */
3426     init_sql_alloc(PSI_NOT_INSTRUMENTED, &root, MEM_ROOT_BLOCK_SIZE, 0);
3427     {
3428       Group_check gc(this, &root);
3429       rc= gc.check_query(thd);
3430       gc.to_opt_trace(thd);
3431     } // scope, to let any destructor run before free_root().
3432     free_root(&root, MYF(0));
3433   }
3434 
3435   if (!rc && is_distinct())
3436   {
3437     Distinct_check dc(this);
3438     rc= dc.check_query(thd);
3439   }
3440 
3441   return rc;
3442 }
3443 
3444 
3445 /**
3446   Do final setup of ORDER BY clause, after the query block is fully resolved.
3447 
3448   Check that ORDER BY clause is not redundant.
3449   Split any aggregate functions.
3450 
3451   @param thd                      Thread handler
3452 
3453   @returns false if success, true if error
3454 */
setup_order_final(THD * thd)3455 bool SELECT_LEX::setup_order_final(THD *thd)
3456 {
3457   if (is_implicitly_grouped())
3458   {
3459     // Result will contain zero or one row - ordering is redundant
3460     empty_order_list(this);
3461     return false;
3462   }
3463 
3464   if ((master_unit()->is_union() || master_unit()->fake_select_lex) &&
3465       this != master_unit()->fake_select_lex &&
3466       !(braces && explicit_limit))
3467   {
3468     // Part of UNION which requires global ordering may skip local order
3469     empty_order_list(this);
3470     return false;
3471   }
3472 
3473   for (ORDER *ord= order_list.first; ord; ord= ord->next)
3474   {
3475     Item *const item= *ord->item;
3476 
3477     if (item->with_sum_func && item->type() != Item::SUM_FUNC_ITEM)
3478     {
3479       item->split_sum_func(thd, ref_ptrs, all_fields);
3480       if (thd->is_error())
3481         return true;       /* purecov: inspected */
3482     }
3483   }
3484   return false;
3485 }
3486 
3487 
3488 /**
3489   Resolve and set up the GROUP BY list.
3490 
3491   @param thd			Thread handler
3492 
3493   @todo
3494     change ER_WRONG_FIELD_WITH_GROUP to more detailed
3495     ER_NON_GROUPING_FIELD_USED
3496 
3497   @returns false if success, true if error
3498 */
3499 
setup_group(THD * thd)3500 bool SELECT_LEX::setup_group(THD *thd)
3501 {
3502   assert(group_list.elements);
3503 
3504   thd->where="group statement";
3505   for (ORDER *group= group_list.first; group; group= group->next)
3506   {
3507     if (find_order_in_list(thd, ref_ptrs, get_table_list(), group, fields_list,
3508 			   all_fields, true))
3509       return true;
3510     if ((*group->item)->with_sum_func)
3511     {
3512       my_error(ER_WRONG_GROUP_FIELD, MYF(0), (*group->item)->full_name());
3513       return true;
3514     }
3515   }
3516   return false;
3517 }
3518 
3519 
3520 /****************************************************************************
3521   ROLLUP handling
3522 ****************************************************************************/
3523 
3524 /**
3525   Replace occurrences of group by fields in an expression by ref items.
3526 
3527   The function replaces occurrences of group by fields in expr
3528   by ref objects for these fields unless they are under aggregate
3529   functions.
3530   The function also corrects value of the the maybe_null attribute
3531   for the items of all subexpressions containing group by fields.
3532 
3533   @b EXAMPLES
3534     @code
3535       SELECT a+1 FROM t1 GROUP BY a WITH ROLLUP
3536       SELECT SUM(a)+a FROM t1 GROUP BY a WITH ROLLUP
3537   @endcode
3538 
3539   @b IMPLEMENTATION
3540 
3541     The function recursively traverses the tree of the expr expression,
3542     looks for occurrences of the group by fields that are not under
3543     aggregate functions and replaces them for the corresponding ref items.
3544 
3545   @note
3546     This substitution is needed GROUP BY queries with ROLLUP if
3547     SELECT list contains expressions over group by attributes.
3548 
3549   @param thd                  reference to the context
3550   @param expr                 expression to make replacement
3551   @param changed[out]  returns true if item contains a replaced field item
3552 
3553   @todo
3554     - TODO: Some functions are not null-preserving. For those functions
3555     updating of the maybe_null attribute is an overkill.
3556 
3557   @returns false if success, true if error
3558 
3559 */
3560 
change_group_ref(THD * thd,Item_func * expr,bool * changed)3561 bool SELECT_LEX::change_group_ref(THD *thd, Item_func *expr, bool *changed)
3562 {
3563   bool arg_changed= false;
3564   for (uint i= 0; i < expr->arg_count; i++)
3565   {
3566     Item **arg= expr->arguments() + i;
3567     Item *const item= *arg;
3568     if (item->type() == Item::FIELD_ITEM || item->type() == Item::REF_ITEM)
3569     {
3570       for (ORDER *group= group_list.first; group; group= group->next)
3571       {
3572         if (item->eq(*group->item, 0))
3573         {
3574           Item *new_item;
3575           if (!(new_item= new Item_ref(&context, group->item, 0,
3576                                        item->item_name.ptr())))
3577             return true;              /* purecov: inspected */
3578 
3579           expr->replace_argument(thd, arg, new_item);
3580           arg_changed= true;
3581         }
3582       }
3583     }
3584     else if (item->type() == Item::FUNC_ITEM)
3585     {
3586       if (change_group_ref(thd, (Item_func *) item, &arg_changed))
3587         return true;
3588     }
3589   }
3590   if (arg_changed)
3591   {
3592     expr->maybe_null= true;
3593     *changed= true;
3594   }
3595   return false;
3596 }
3597 
3598 
3599 /**
3600   Resolve items for rollup processing
3601 
3602   @param   thd   Thread handler
3603 
3604   @returns false if success, true if error
3605 */
3606 
resolve_rollup(THD * thd)3607 bool SELECT_LEX::resolve_rollup(THD *thd)
3608 {
3609   List_iterator<Item> it(all_fields);
3610   Item *item;
3611   while ((item= it++))
3612   {
3613     bool found_in_group= false;
3614 
3615     for (ORDER *group= group_list.first; group; group= group->next)
3616     {
3617       if (*group->item == item)
3618       {
3619         item->maybe_null= true;
3620         found_in_group= true;
3621         break;
3622       }
3623     }
3624     if (item->type() == Item::FUNC_ITEM && !found_in_group)
3625     {
3626       bool changed= false;
3627       if (change_group_ref(thd, (Item_func *) item, &changed))
3628         return true;       /* purecov: inspected */
3629       /*
3630         We have to prevent creation of a field in a temporary table for
3631         an expression that contains GROUP BY attributes.
3632         Marking the expression item as 'with_sum_func' will ensure this.
3633       */
3634       if (changed)
3635         item->with_sum_func= true;
3636     }
3637   }
3638   return false;
3639 }
3640 
3641 /**
3642   @brief  validate_gc_assignment
3643   Check whether the other values except DEFAULT are assigned
3644   for generated columns.
3645 
3646   @param thd                        thread handler
3647   @param fields                     Item_fields list to be filled
3648   @param values                     values to fill with
3649   @param table                      table to be checked
3650   @return Operation status
3651     @retval false   OK
3652     @retval true    Error occured
3653 
3654   @Note: This function must be called after table->write_set has been
3655          filled.
3656 */
3657 bool
validate_gc_assignment(THD * thd,List<Item> * fields,List<Item> * values,TABLE * table)3658 validate_gc_assignment(THD * thd, List<Item> *fields,
3659                        List<Item> *values, TABLE *table)
3660 {
3661   Field **fld= NULL;
3662   MY_BITMAP *bitmap= table->write_set;
3663   bool use_table_field= false;
3664   DBUG_ENTER("validate_gc_assignment");
3665 
3666   if (!values || (values->elements == 0))
3667       DBUG_RETURN(false);
3668 
3669   // If fields has no elements, we use all table fields
3670   if (fields->elements == 0)
3671   {
3672     use_table_field= true;
3673     fld= table->field;
3674   }
3675   List_iterator_fast<Item> f(*fields),v(*values);
3676   Item *value;
3677   while ((value= v++))
3678   {
3679     Field *rfield;
3680 
3681     if (!use_table_field)
3682       rfield= (down_cast<Item_field*>((f++)->real_item()))->field;
3683     else
3684       rfield= *(fld++);
3685     if (rfield->table != table)
3686       continue;
3687     /* skip non marked fields */
3688     if (!bitmap_is_set(bitmap, rfield->field_index))
3689       continue;
3690     if (rfield->gcol_info &&
3691         value->type() != Item::DEFAULT_VALUE_ITEM)
3692     {
3693       my_error(ER_NON_DEFAULT_VALUE_FOR_GENERATED_COLUMN, MYF(0),
3694                rfield->field_name, rfield->table->s->table_name.str);
3695       DBUG_RETURN(true);
3696     }
3697   }
3698   DBUG_RETURN(false);
3699 }
3700 
3701 
3702 /**
3703   Delete unused columns from merged tables.
3704 
3705   This function is called recursively for each join nest and/or table
3706   in the query block. For each merged table that it finds, each column
3707   that contains a subquery and is not marked as used is removed and
3708   the translation item is set to NULL.
3709 
3710   @param tables List of tables and join nests
3711 */
3712 
delete_unused_merged_columns(List<TABLE_LIST> * tables)3713 void SELECT_LEX::delete_unused_merged_columns(List<TABLE_LIST> *tables)
3714 {
3715   DBUG_ENTER("delete_unused_merged_columns");
3716 
3717   TABLE_LIST *tl;
3718   List_iterator<TABLE_LIST> li(*tables);
3719   while ((tl= li++))
3720   {
3721     if (tl->nested_join == NULL)
3722       continue;
3723     if (tl->is_merged())
3724     {
3725       for (Field_translator *transl= tl->field_translation;
3726            transl < tl->field_translation_end;
3727            transl++)
3728       {
3729         Item *const item= transl->item;
3730 
3731         assert(item->fixed);
3732         if (!item->has_subquery())
3733           continue;
3734 
3735         /*
3736           All used columns selected from derived tables are already marked
3737           as such. But unmarked columns may still refer to other columns
3738           from underlying derived tables, and in that case we cannot
3739           delete these columns as they share the same items.
3740           Thus, dive into the expression and mark such columns as "used".
3741           (This is a bit incorrect, as only a part of its underlying expression
3742           is "used", but that has no practical meaning.)
3743         */
3744         if (!item->is_derived_used() &&
3745             item->walk(&Item::propagate_derived_used, Item::WALK_POSTFIX, NULL))
3746           item->walk(&Item::propagate_set_derived_used,
3747                      Item::WALK_SUBQUERY_POSTFIX, NULL);
3748 
3749         if (!item->is_derived_used())
3750         {
3751           item->walk(&Item::clean_up_after_removal, walk_subquery,
3752                      pointer_cast<uchar *>(this));
3753           transl->item= NULL;
3754         }
3755       }
3756     }
3757     delete_unused_merged_columns(&tl->nested_join->join_list);
3758   }
3759 
3760   DBUG_VOID_RETURN;
3761 }
3762 
3763 
3764 /**
3765   Add item to the hidden part of select list.
3766 
3767   @param item  item to add
3768 
3769   @return Pointer to ref_ptr for the added item
3770 */
3771 
add_hidden_item(Item * item)3772 Item **SELECT_LEX::add_hidden_item(Item *item)
3773 {
3774   uint el= all_fields.elements;
3775   ref_ptrs[el]= item;
3776   all_fields.push_front(item);
3777   return &ref_ptrs[el];
3778 }
3779 
3780 /**
3781   @} (end of group Query_Resolver)
3782 */
3783