1 /* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
22 
23 /**
24   @file
25 
26   @brief
27   Implementation of name resolution stage
28 
29 
30   @defgroup Query_Resolver  Query Resolver
31   @{
32 */
33 
34 #include "sql_select.h"
35 #include "sql_resolver.h"
36 #include "sql_optimizer.h"
37 #include "opt_trace.h"
38 #include "sql_base.h"
39 #include "sql_acl.h"
40 #include "opt_explain_format.h"
41 
42 static void remove_redundant_subquery_clauses(st_select_lex *subq_select_lex,
43                                               int hidden_group_field_count,
44                                               int hidden_order_field_count,
45                                               List<Item> &fields,
46                                               Ref_ptr_array ref_pointer_array);
47 static inline int
48 setup_without_group(THD *thd, Ref_ptr_array ref_pointer_array,
49                     TABLE_LIST *tables,
50                     TABLE_LIST *leaves,
51                     List<Item> &fields,
52                     List<Item> &all_fields,
53                     Item **conds,
54                     ORDER *order,
55                     ORDER *group,
56                     int *hidden_group_field_count,
57                     int *hidden_order_field_count);
58 static bool resolve_subquery(THD *thd, JOIN *join);
59 static int
60 setup_group(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables,
61             List<Item> &fields, List<Item> &all_fields, ORDER *order);
62 static bool
63 match_exprs_for_only_full_group_by(THD *thd, List<Item> &all_fields,
64                                    int hidden_group_exprs_count,
65                                    int hidden_order_exprs_count,
66                                    int select_exprs_count,
67                                    ORDER *group_exprs);
68 
69 
70 /**
71   Prepare of whole select (including sub queries in future).
72 
73   @todo
74     Add check of calculation of GROUP functions and fields:
75     SELECT COUNT(*)+table.col1 from table1;
76 
77   @retval
78     -1   on error
79   @retval
80     0   on success
81 */
82 int
prepare(TABLE_LIST * tables_init,uint wild_num,Item * conds_init,uint og_num,ORDER * order_init,ORDER * group_init,Item * having_init,SELECT_LEX * select_lex_arg,SELECT_LEX_UNIT * unit_arg)83 JOIN::prepare(TABLE_LIST *tables_init,
84 	      uint wild_num, Item *conds_init, uint og_num,
85 	      ORDER *order_init, ORDER *group_init,
86 	      Item *having_init,
87 	      SELECT_LEX *select_lex_arg,
88 	      SELECT_LEX_UNIT *unit_arg)
89 {
90   DBUG_ENTER("JOIN::prepare");
91 
92   // to prevent double initialization on EXPLAIN
93   if (optimized)
94     DBUG_RETURN(0);
95 
96   // We may do subquery transformation, or Item substitution:
97   Prepare_error_tracker tracker(thd);
98 
99   if (order_init)
100     explain_flags.set(ESC_ORDER_BY, ESP_EXISTS);
101   if (group_init)
102     explain_flags.set(ESC_GROUP_BY, ESP_EXISTS);
103   if (select_options & SELECT_DISTINCT)
104     explain_flags.set(ESC_DISTINCT, ESP_EXISTS);
105 
106   conds= conds_init;
107   order= ORDER_with_src(order_init, ESC_ORDER_BY);
108   group_list= ORDER_with_src(group_init, ESC_GROUP_BY);
109   having= having_for_explain= having_init;
110   tables_list= tables_init;
111   DBUG_ASSERT(!unit_arg->cleaned);
112   select_lex= select_lex_arg;
113   select_lex->join= this;
114   join_list= &select_lex->top_join_list;
115   union_part= unit_arg->is_union();
116 
117   thd->lex->current_select->is_item_list_lookup= 1;
118   /*
119     If we have already executed SELECT, then it have not sense to prevent
120     its table from update (see unique_table())
121   */
122   if (thd->derived_tables_processing)
123     select_lex->exclude_from_table_unique_test= TRUE;
124 
125   Opt_trace_context * const trace= &thd->opt_trace;
126   Opt_trace_object trace_wrapper(trace);
127   Opt_trace_object trace_prepare(trace, "join_preparation");
128   trace_prepare.add_select_number(select_lex->select_number);
129   Opt_trace_array trace_steps(trace, "steps");
130 
131   /* Check that all tables, fields, conds and order are ok */
132 
133   if (!(select_options & OPTION_SETUP_TABLES_DONE) &&
134       setup_tables_and_check_access(thd, &select_lex->context, join_list,
135                                     tables_list, &select_lex->leaf_tables,
136                                     FALSE, SELECT_ACL, SELECT_ACL))
137       DBUG_RETURN(-1);
138 
139   select_lex->derived_table_count= 0;
140   select_lex->materialized_table_count= 0;
141   select_lex->partitioned_table_count= 0;
142 
143   TABLE_LIST *table_ptr;
144   for (table_ptr= select_lex->leaf_tables;
145        table_ptr;
146        table_ptr= table_ptr->next_leaf)
147   {
148     primary_tables++;           // Count the primary input tables of the query
149     if (table_ptr->derived)
150       select_lex->derived_table_count++;
151     if (table_ptr->uses_materialization())
152       select_lex->materialized_table_count++;
153 #ifdef WITH_PARTITION_STORAGE_ENGINE
154     if (table_ptr->table->part_info)
155       select_lex->partitioned_table_count++;
156 #endif
157   }
158   tables= primary_tables;       // This is currently the total number of tables
159 
160   /*
161     Item and Item_field CTORs will both increment some counters
162     in current_select, based on the current parsing context.
163     We are not parsing anymore: any new Items created now are due to
164     query rewriting, so stop incrementing counters.
165    */
166   DBUG_ASSERT(select_lex->parsing_place == NO_MATTER);
167   select_lex->parsing_place= NO_MATTER;
168 
169   if (setup_wild(thd, tables_list, fields_list, &all_fields, wild_num))
170     DBUG_RETURN(-1);
171   if (select_lex->setup_ref_array(thd, og_num))
172     DBUG_RETURN(-1);
173 
174   ref_ptrs= ref_ptr_array_slice(0);
175 
176   if (setup_fields(thd, ref_ptrs, fields_list, MARK_COLUMNS_READ,
177                    &all_fields, 1))
178     DBUG_RETURN(-1);
179 
180   int hidden_order_field_count;
181   if (setup_without_group(thd, ref_ptrs, tables_list,
182                           select_lex->leaf_tables, fields_list,
183                           all_fields, &conds, order, group_list,
184                           &hidden_group_field_count,
185                           &hidden_order_field_count))
186     DBUG_RETURN(-1);
187 
188   /*
189     Permanently remove redundant parts from the query if
190       1) This is a subquery
191       2) This is the first time this query is optimized (since the
192          transformation is permanent)
193       3) Not normalizing a view. Removal should take place when a
194          query involving a view is optimized, not when the view
195          is created
196   */
197   if (select_lex->master_unit()->item &&                               // 1)
198       select_lex->first_cond_optimization &&                           // 2)
199       !(thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW)) // 3)
200   {
201     remove_redundant_subquery_clauses(select_lex, hidden_group_field_count,
202                                       hidden_order_field_count,
203                                       all_fields, ref_ptrs);
204   }
205 
206   if (having)
207   {
208     nesting_map save_allow_sum_func= thd->lex->allow_sum_func;
209     thd->where="having clause";
210     thd->lex->allow_sum_func|= (nesting_map)1 << select_lex_arg->nest_level;
211     select_lex->having_fix_field= 1;
212     select_lex->resolve_place= st_select_lex::RESOLVE_HAVING;
213     bool having_fix_rc= (!having->fixed &&
214                          (having->fix_fields(thd, &having) ||
215                           having->check_cols(1)));
216     select_lex->having_fix_field= 0;
217     having_for_explain= select_lex->having= having;
218 
219     select_lex->resolve_place= st_select_lex::RESOLVE_NONE;
220     if (having_fix_rc || thd->is_error())
221       DBUG_RETURN(-1);				/* purecov: inspected */
222     thd->lex->allow_sum_func= save_allow_sum_func;
223   }
224 
225   /*
226     Printing the expanded query should happen here and not elsewhere, because
227     when a view is merged (when the view is opened in open_tables()), the
228     parent query's select_lex does not yet contain a correct WHERE clause (it
229     misses the view's merged WHERE clause). This is corrected only just above,
230     in TABLE_LIST::prep_where(), called by
231     setup_without_group()->setup_conds().
232     We also have to wait for fix_fields() on HAVING, above.
233     At this stage, we also have properly set up Item_ref-s.
234   */
235   {
236     Opt_trace_object trace_wrapper(trace);
237     opt_trace_print_expanded_query(thd, select_lex, &trace_wrapper);
238   }
239 
240   /*
241     When normalizing a view (like when writing a view's body to the FRM),
242     subquery transformations don't apply (if they did, IN->EXISTS could not be
243     undone in favour of materialization, when optimizing a later statement
244     using the view)
245   */
246   if (select_lex->master_unit()->item &&    // This is a subquery
247                                             // Not normalizing a view
248       !(thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW) &&
249       !(select_options & SELECT_DESCRIBE))  // Not within a describe
250   {
251     /* Join object is a subquery within an IN/ANY/ALL/EXISTS predicate */
252     if (resolve_subquery(thd, this))
253       DBUG_RETURN(-1);
254   }
255 
256   select_lex->fix_prepare_information(thd, &conds, &having);
257 
258   if (order)
259   {
260     bool real_order= FALSE;
261     ORDER *ord;
262     for (ord= order; ord; ord= ord->next)
263     {
264       Item *item= *ord->item;
265       /*
266         Disregard sort order if there's only
267         zero length NOT NULL fields (e.g. {VAR}CHAR(0) NOT NULL") or
268         zero length NOT NULL string functions there.
269         Such tuples don't contain any data to sort.
270       */
271       if (!real_order &&
272            /* Not a zero length NOT NULL field */
273           ((item->type() != Item::FIELD_ITEM ||
274             ((Item_field *) item)->field->maybe_null() ||
275             ((Item_field *) item)->field->sort_length()) &&
276            /* AND not a zero length NOT NULL string function. */
277            (item->type() != Item::FUNC_ITEM ||
278             item->maybe_null ||
279             item->result_type() != STRING_RESULT ||
280             item->max_length)))
281         real_order= TRUE;
282 
283       if (item->with_sum_func && item->type() != Item::SUM_FUNC_ITEM)
284         item->split_sum_func(thd, ref_ptrs, all_fields);
285     }
286     if (!real_order)
287       order= NULL;
288   }
289 
290   if (having && having->with_sum_func)
291     having->split_sum_func2(thd, ref_ptrs,
292                             all_fields, &having, TRUE);
293   if (select_lex->inner_sum_func_list)
294   {
295     Item_sum *end=select_lex->inner_sum_func_list;
296     Item_sum *item_sum= end;
297     do
298     {
299       item_sum= item_sum->next;
300       item_sum->split_sum_func2(thd, ref_ptrs,
301                                 all_fields, item_sum->ref_by, FALSE);
302     } while (item_sum != end);
303   }
304 
305   if (select_lex->inner_refs_list.elements &&
306       fix_inner_refs(thd, all_fields, select_lex, ref_ptrs,
307                      group_list))
308     DBUG_RETURN(-1);
309 
310   if (group_list)
311   {
312     /*
313       Because HEAP tables can't index BIT fields we need to use an
314       additional hidden field for grouping because later it will be
315       converted to a LONG field. Original field will remain of the
316       BIT type and will be returned to a client.
317     */
318     for (ORDER *ord= group_list; ord; ord= ord->next)
319     {
320       if ((*ord->item)->type() == Item::FIELD_ITEM &&
321           (*ord->item)->field_type() == MYSQL_TYPE_BIT)
322       {
323         Item_field *field= new Item_field(thd, *(Item_field**)ord->item);
324         int el= all_fields.elements;
325         ref_ptrs[el]= field;
326         all_fields.push_front(field);
327         ord->item= &ref_ptrs[el];
328       }
329     }
330   }
331 
332   if (setup_ftfuncs(select_lex)) /* should be after having->fix_fields */
333     DBUG_RETURN(-1);
334 
335 
336   /*
337     Check if there are references to un-aggregated columns when computing
338     aggregate functions with implicit grouping (there is no GROUP BY).
339   */
340   if (thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY && !group_list &&
341       select_lex->non_agg_field_used() &&
342       select_lex->agg_func_used())
343   {
344     my_message(ER_MIX_OF_GROUP_FUNC_AND_FIELDS,
345                ER(ER_MIX_OF_GROUP_FUNC_AND_FIELDS), MYF(0));
346     DBUG_RETURN(-1);
347   }
348   {
349     /* Caclulate the number of groups */
350     send_group_parts= 0;
351     for (ORDER *group_tmp= group_list ; group_tmp ; group_tmp= group_tmp->next)
352       send_group_parts++;
353   }
354 
355 
356   if (result && result->prepare(fields_list, unit_arg))
357     DBUG_RETURN(-1); /* purecov: inspected */
358 
359   /* Init join struct */
360   count_field_types(select_lex, &tmp_table_param, all_fields, 0);
361   this->group= group_list != 0;
362   unit= unit_arg;
363 
364   if (tmp_table_param.sum_func_count && !group_list)
365   {
366     implicit_grouping= TRUE;
367     // Result will contain zero or one row - ordering is meaningless
368     order= NULL;
369   }
370 
371 #ifdef RESTRICTED_GROUP
372   if (implicit_grouping)
373   {
374     my_message(ER_WRONG_SUM_SELECT,ER(ER_WRONG_SUM_SELECT),MYF(0));
375     DBUG_RETURN(-1); /* purecov: inspected */
376   }
377 #endif
378   if (select_lex->olap == ROLLUP_TYPE && rollup_init())
379     DBUG_RETURN(-1); /* purecov: inspected */
380   if (alloc_func_list())
381     DBUG_RETURN(-1); /* purecov: inspected */
382 
383 #ifdef WITH_PARTITION_STORAGE_ENGINE
384   if (select_lex->partitioned_table_count)
385   {
386     for (TABLE_LIST *tbl= select_lex->leaf_tables; tbl; tbl= tbl->next_leaf)
387     {
388       /*
389         This will only prune constant conditions, which will be used for
390         lock pruning.
391       */
392       if (prune_partitions(thd, tbl->table,
393                            tbl->join_cond() ? tbl->join_cond() : conds))
394         DBUG_RETURN(-1); /* purecov: inspected */
395     }
396   }
397 #endif
398 
399   DBUG_RETURN(0); // All OK
400 }
401 
402 
403 /**
404   Check if the subquery predicate can be executed via materialization.
405 
406   @param predicate IN subquery predicate
407   @param thd       THD
408   @param select_lex SELECT_LEX of the subquery
409   @param outer      Parent SELECT_LEX (outer to subquery)
410 
411   @return TRUE if subquery allows materialization, FALSE otherwise.
412 */
413 
subquery_allows_materialization(Item_in_subselect * predicate,THD * thd,SELECT_LEX * select_lex,const SELECT_LEX * outer)414 bool subquery_allows_materialization(Item_in_subselect *predicate,
415                                      THD *thd,
416                                      SELECT_LEX *select_lex,
417                                      const SELECT_LEX *outer)
418 {
419   bool has_nullables= false;
420   const uint elements= predicate->unit->first_select()->item_list.elements;
421   DBUG_ENTER("subquery_allows_materialization");
422   DBUG_ASSERT(elements >= 1);
423   DBUG_ASSERT(predicate->left_expr->cols() == elements);
424 
425   OPT_TRACE_TRANSFORM(&thd->opt_trace, trace_wrapper, trace_mat,
426                       select_lex->select_number,
427                       "IN (SELECT)", "materialization");
428 
429   const char *cause= NULL;
430   if (predicate->substype() != Item_subselect::IN_SUBS)
431   {
432     // Subq-mat cannot handle 'outer_expr > {ANY|ALL}(subq)'...
433     cause= "not an IN predicate";
434   }
435   else if (select_lex->is_part_of_union())
436   {
437     // Subquery must be a single query specification clause (not a UNION)
438     cause= "in UNION";
439   }
440   else if (!select_lex->master_unit()->first_select()->leaf_tables)
441   {
442     // Subquery has no tables, hence no point in materializing.
443     cause= "no inner tables";
444   }
445   else if (!outer->join)
446   {
447     /*
448       Maybe this is a subquery of a single table UPDATE/DELETE (TODO:
449       handle this by switching to multi-table UPDATE/DELETE).
450     */
451     cause= "parent query has no JOIN";
452   }
453   else if (!outer->leaf_tables)
454   {
455     // The upper query is SELECT ... FROM DUAL. No gain in materializing.
456     cause= "no tables in outer query";
457   }
458   else if (predicate->originally_dependent())
459   {
460     /*
461       Subquery should not be correlated; the correlation due to predicates
462       injected by IN->EXISTS does not count as we will remove them if we
463       choose materialization.
464 
465       TODO:
466       This is an overly restrictive condition. It can be extended to:
467          (Subquery is non-correlated ||
468           Subquery is correlated to any query outer to IN predicate ||
469           (Subquery is correlated to the immediate outer query &&
470            Subquery !contains {GROUP BY, ORDER BY [LIMIT],
471            aggregate functions}) && subquery predicate is not under "NOT IN"))
472     */
473     cause= "correlated";
474   }
475   else
476   {
477     /*
478       Check that involved expression types allow materialization.
479       This is a temporary fix for BUG#36752; see bug report for
480       description of restrictions we need to put on the compared expressions.
481     */
482     DBUG_ASSERT(predicate->left_expr->fixed);
483     List_iterator<Item> it(predicate->unit->first_select()->item_list);
484 
485     for (uint i= 0; i < elements; i++)
486     {
487       Item * const inner= it++;
488       Item * const outer= predicate->left_expr->element_index(i);
489       if (!types_allow_materialization(outer, inner))
490       {
491         cause= "type mismatch";
492         break;
493       }
494       if (inner->is_blob_field())                 // 6
495       {
496         cause= "inner blob";
497         break;
498       }
499       has_nullables|= outer->maybe_null | inner->maybe_null;
500     }
501 
502     if (!cause)
503     {
504       trace_mat.add("has_nullable_expressions", has_nullables);
505       /*
506         Subquery materialization cannot handle NULLs partial matching
507         properly, yet. If the outer or inner values are NULL, the
508         subselect_hash_sj_engine may reply FALSE when it should reply UNKNOWN.
509         So, we must limit it to those three cases:
510         - when FALSE and UNKNOWN are equivalent answers. I.e. this is a a
511         top-level predicate (this implies it is not negated).
512         - when outer and inner values cannot be NULL.
513         - when there is a single inner column (because for this we have a
514         limited implementation of NULLs partial matching).
515       */
516       const bool is_top_level= predicate->is_top_level_item();
517       trace_mat.add("treat_UNKNOWN_as_FALSE", is_top_level);
518 
519       if (!is_top_level && has_nullables && (elements > 1))
520         cause= "cannot_handle_partial_matches";
521       else
522       {
523         trace_mat.add("possible", true);
524         DBUG_RETURN(TRUE);
525       }
526     }
527   }
528   DBUG_ASSERT(cause != NULL);
529   trace_mat.add("possible", false).add_alnum("cause", cause);
530   DBUG_RETURN(false);
531 }
532 
533 
534 /**
535   @brief Resolve predicate involving subquery
536 
537   @param thd     Pointer to THD.
538   @param join    Join that is part of a subquery predicate.
539 
540   @retval FALSE  Success.
541   @retval TRUE   Error.
542 
543   @details
544   Perform early unconditional subquery transformations:
545    - Convert subquery predicate into semi-join, or
546    - Mark the subquery for execution using materialization, or
547    - Perform IN->EXISTS transformation, or
548    - Perform more/less ALL/ANY -> MIN/MAX rewrite
549    - Substitute trivial scalar-context subquery with its value
550 
551   @todo for PS, make the whole block execute only on the first execution
552 
553 */
554 
resolve_subquery(THD * thd,JOIN * join)555 static bool resolve_subquery(THD *thd, JOIN *join)
556 {
557   DBUG_ENTER("resolve_subquery");
558 
559   bool chose_semijoin= false;
560   SELECT_LEX *const select_lex= join->select_lex;
561   SELECT_LEX *const outer= select_lex->outer_select();
562 
563   /*
564     @todo for PS, make the whole block execute only on the first execution.
565     resolve_subquery() is only invoked in the first execution for subqueries
566     that are transformed to semijoin, but for other subqueries, this function
567     is called for every execution. One solution is perhaps to define
568     exec_method in class Item_subselect and exit immediately if unequal to
569     EXEC_UNSPECIFIED.
570   */
571   Item_subselect *subq_predicate= select_lex->master_unit()->item;
572   DBUG_ASSERT(subq_predicate);
573   /**
574     @note
575     In this case: IN (SELECT ... UNION SELECT ...), JOIN::prepare() is
576     called for each of the two UNION members, and in those two calls,
577     subq_predicate is the same, not sure this is desired (double work?).
578   */
579 
580   Item_in_subselect * const in_predicate=
581     (subq_predicate->substype() == Item_subselect::IN_SUBS) ?
582     static_cast<Item_in_subselect *>(subq_predicate) : NULL;
583 
584   if (in_predicate)
585   {
586     DBUG_ASSERT(select_lex == thd->lex->current_select);
587     thd->lex->current_select= outer;
588     char const *save_where= thd->where;
589     thd->where= "IN/ALL/ANY subquery";
590 
591     bool result= !in_predicate->left_expr->fixed &&
592                   in_predicate->left_expr->fix_fields(thd,
593                                                      &in_predicate->left_expr);
594     thd->lex->current_select= select_lex;
595     thd->where= save_where;
596     if (result)
597       DBUG_RETURN(TRUE); /* purecov: deadcode */
598 
599     /*
600       Check if the left and right expressions have the same # of
601       columns, i.e. we don't have a case like
602         (oe1, oe2) IN (SELECT ie1, ie2, ie3 ...)
603 
604       TODO why do we have this duplicated in IN->EXISTS transformers?
605       psergey-todo: fix these: grep for duplicated_subselect_card_check
606     */
607     if (select_lex->item_list.elements != in_predicate->left_expr->cols())
608     {
609       my_error(ER_OPERAND_COLUMNS, MYF(0), in_predicate->left_expr->cols());
610       DBUG_RETURN(TRUE);
611     }
612 
613   }
614 
615   DBUG_PRINT("info", ("Checking if subq can be converted to semi-join"));
616   /*
617     Check if we're in subquery that is a candidate for flattening into a
618     semi-join (which is done in flatten_subqueries()). The requirements are:
619       1. Subquery predicate is an IN/=ANY subquery predicate
620       2. Subquery is a single SELECT (not a UNION)
621       3. Subquery does not have GROUP BY
622       4. Subquery does not use aggregate functions or HAVING
623       5. Subquery predicate is at the AND-top-level of ON/WHERE clause
624       6. We are not in a subquery of a single table UPDATE/DELETE that
625            doesn't have a JOIN (TODO: We should handle this at some
626            point by switching to multi-table UPDATE/DELETE)
627       7. We're not in a confluent table-less subquery, like "SELECT 1".
628       8. No execution method was already chosen (by a prepared statement)
629       9. Parent select is not a confluent table-less select
630       10. Neither parent nor child select have STRAIGHT_JOIN option.
631   */
632   if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_SEMIJOIN) &&
633       in_predicate &&                                                   // 1
634       !select_lex->is_part_of_union() &&                                // 2
635       !select_lex->group_list.elements &&                               // 3
636       !join->having && !select_lex->with_sum_func &&                    // 4
637       (outer->resolve_place == st_select_lex::RESOLVE_CONDITION ||      // 5
638        outer->resolve_place == st_select_lex::RESOLVE_JOIN_NEST) &&     // 5
639       outer->join &&                                                    // 6
640       select_lex->master_unit()->first_select()->leaf_tables &&         // 7
641       in_predicate->exec_method ==
642                            Item_exists_subselect::EXEC_UNSPECIFIED &&   // 8
643       outer->leaf_tables &&                                             // 9
644       !((join->select_options | outer->join->select_options)
645         & SELECT_STRAIGHT_JOIN))                                        // 10
646   {
647     DBUG_PRINT("info", ("Subquery is semi-join conversion candidate"));
648 
649     /* Notify in the subquery predicate where it belongs in the query graph */
650     in_predicate->embedding_join_nest= outer->resolve_nest;
651 
652     /* Register the subquery for further processing in flatten_subqueries() */
653     outer->join->sj_subselects.push_back(in_predicate);
654     chose_semijoin= true;
655   }
656 
657   if (in_predicate)
658   {
659     Opt_trace_context * const trace= &join->thd->opt_trace;
660     OPT_TRACE_TRANSFORM(trace, oto0, oto1,
661                         select_lex->select_number, "IN (SELECT)", "semijoin");
662     oto1.add("chosen", chose_semijoin);
663   }
664 
665   if (!chose_semijoin &&
666       subq_predicate->select_transformer(join) == Item_subselect::RES_ERROR)
667     DBUG_RETURN(TRUE);
668 
669   DBUG_RETURN(FALSE);
670 }
671 
672 
673 /**
674   Fix fields referenced from inner selects.
675 
676   @param thd               Thread handle
677   @param all_fields        List of all fields used in select
678   @param select            Current select
679   @param ref_pointer_array Array of references to Items used in current select
680   @param group_list        GROUP BY list (is NULL by default)
681 
682   @details
683     The function serves 3 purposes
684 
685     - adds fields referenced from inner query blocks to the current select list
686 
687     - Decides which class to use to reference the items (Item_ref or
688       Item_direct_ref)
689 
690     - fixes references (Item_ref objects) to these fields.
691 
692     If a field isn't already on the select list and the ref_pointer_array
693     is provided then it is added to the all_fields list and the pointer to
694     it is saved in the ref_pointer_array.
695 
696     The class to access the outer field is determined by the following rules:
697 
698     -#. If the outer field isn't used under an aggregate function then the
699         Item_ref class should be used.
700 
701     -#. If the outer field is used under an aggregate function and this
702         function is, in turn, aggregated in the query block where the outer
703         field was resolved or some query nested therein, then the
704         Item_direct_ref class should be used. Also it should be used if we are
705         grouping by a subquery containing the outer field.
706 
707     The resolution is done here and not at the fix_fields() stage as
708     it can be done only after aggregate functions are fixed and pulled up to
709     selects where they are to be aggregated.
710 
711     When the class is chosen it substitutes the original field in the
712     Item_outer_ref object.
713 
714     After this we proceed with fixing references (Item_outer_ref objects) to
715     this field from inner subqueries.
716 
717   @return Status
718   @retval true An error occured.
719   @retval false OK.
720  */
721 
722 bool
fix_inner_refs(THD * thd,List<Item> & all_fields,SELECT_LEX * select,Ref_ptr_array ref_pointer_array,ORDER * group_list)723 fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select,
724                Ref_ptr_array ref_pointer_array, ORDER *group_list)
725 {
726   Item_outer_ref *ref;
727 
728   List_iterator<Item_outer_ref> ref_it(select->inner_refs_list);
729   while ((ref= ref_it++))
730   {
731     bool direct_ref= false;
732     Item *item= ref->outer_ref;
733     Item **item_ref= ref->ref;
734     Item_ref *new_ref;
735     /*
736       TODO: this field item already might be present in the select list.
737       In this case instead of adding new field item we could use an
738       existing one. The change will lead to less operations for copying fields,
739       smaller temporary tables and less data passed through filesort.
740     */
741     if (!ref_pointer_array.is_null() && !ref->found_in_select_list)
742     {
743       int el= all_fields.elements;
744       ref_pointer_array[el]= item;
745       /* Add the field item to the select list of the current select. */
746       all_fields.push_front(item);
747       /*
748         If it's needed reset each Item_ref item that refers this field with
749         a new reference taken from ref_pointer_array.
750       */
751       item_ref= &ref_pointer_array[el];
752     }
753 
754     if (ref->in_sum_func)
755     {
756       Item_sum *sum_func;
757       if (ref->in_sum_func->nest_level > select->nest_level)
758         direct_ref= TRUE;
759       else
760       {
761         for (sum_func= ref->in_sum_func; sum_func &&
762              sum_func->aggr_level >= select->nest_level;
763              sum_func= sum_func->in_sum_func)
764         {
765           if (sum_func->aggr_level == select->nest_level)
766           {
767             direct_ref= TRUE;
768             break;
769           }
770         }
771       }
772     }
773     else
774     {
775       /*
776         Check if GROUP BY item trees contain the outer ref:
777         in this case we have to use Item_direct_ref instead of Item_ref.
778       */
779       for (ORDER *group= group_list; group; group= group->next)
780       {
781         if ((*group->item)->walk(&Item::find_item_processor, TRUE,
782                                  (uchar *) ref))
783         {
784           direct_ref= TRUE;
785           break;
786         }
787       }
788     }
789     new_ref= direct_ref ?
790               new Item_direct_ref(ref->context, item_ref, ref->table_name,
791                           ref->field_name, ref->alias_name_used) :
792               new Item_ref(ref->context, item_ref, ref->table_name,
793                           ref->field_name, ref->alias_name_used);
794     if (!new_ref)
795       return TRUE;
796     ref->outer_ref= new_ref;
797     ref->ref= &ref->outer_ref;
798 
799     if (!ref->fixed && ref->fix_fields(thd, 0))
800       return TRUE;
801     thd->lex->used_tables|= item->used_tables();
802     thd->lex->current_select->select_list_tables|= item->used_tables();
803   }
804   return false;
805 }
806 
807 
808 /**
809    Since LIMIT is not supported for table subquery predicates
810    (IN/ALL/EXISTS/etc), the following clauses are redundant for
811    subqueries:
812 
813    ORDER BY
814    DISTINCT
815    GROUP BY   if there are no aggregate functions and no HAVING
816               clause
817 
818    Because redundant clauses are removed both from JOIN and
819    select_lex, the removal is permanent. Thus, it only makes sense to
820    call this function for normal queries and on first execution of
821    SP/PS
822 
823    @param subq_select_lex   select_lex that is part of a subquery
824                             predicate. This object and the associated
825                             join is modified.
826    @param hidden_group_field_count Number of hidden group fields added
827                             by setup_group().
828    @param hidden_order_field_count Number of hidden order fields added
829                             by setup_order().
830    @param fields            Fields list from which to remove items.
831    @param ref_pointer_array Pointers to top level of all_fields.
832 */
833 
834 static
remove_redundant_subquery_clauses(st_select_lex * subq_select_lex,int hidden_group_field_count,int hidden_order_field_count,List<Item> & fields,Ref_ptr_array ref_pointer_array)835 void remove_redundant_subquery_clauses(st_select_lex *subq_select_lex,
836                                        int hidden_group_field_count,
837                                        int hidden_order_field_count,
838                                        List<Item> &fields,
839                                        Ref_ptr_array ref_pointer_array)
840 {
841   Item_subselect *subq_predicate= subq_select_lex->master_unit()->item;
842   /*
843     The removal should happen for IN, ALL, ANY and EXISTS subqueries,
844     which means all but single row subqueries. Example single row
845     subqueries:
846        a) SELECT * FROM t1 WHERE t1.a = (<single row subquery>)
847        b) SELECT a, (<single row subquery) FROM t1
848    */
849   if (subq_predicate->substype() == Item_subselect::SINGLEROW_SUBS)
850     return;
851 
852   // A subquery that is not single row should be one of IN/ALL/ANY/EXISTS.
853   DBUG_ASSERT (subq_predicate->substype() == Item_subselect::EXISTS_SUBS ||
854                subq_predicate->substype() == Item_subselect::IN_SUBS     ||
855                subq_predicate->substype() == Item_subselect::ALL_SUBS    ||
856                subq_predicate->substype() == Item_subselect::ANY_SUBS);
857 
858   enum change
859   {
860     REMOVE_NONE=0,
861     REMOVE_ORDER= 1 << 0,
862     REMOVE_DISTINCT= 1 << 1,
863     REMOVE_GROUP= 1 << 2
864   };
865 
866   uint changelog= 0;
867 
868   if (subq_select_lex->order_list.elements)
869   {
870     changelog|= REMOVE_ORDER;
871     for (ORDER *o= subq_select_lex->order_list.first; o != NULL; o= o->next)
872     {
873       if (*o->item == o->item_ptr)
874         (*o->item)->walk(&Item::clean_up_after_removal, true,
875                       static_cast<uchar*>(static_cast<void*>(subq_select_lex)));
876     }
877     subq_select_lex->join->order= NULL;
878     subq_select_lex->order_list.empty();
879     while (hidden_order_field_count-- > 0)
880     {
881       fields.pop();
882       ref_pointer_array[fields.elements]= NULL;
883     }
884   }
885 
886   if (subq_select_lex->options & SELECT_DISTINCT)
887   {
888     changelog|= REMOVE_DISTINCT;
889     subq_select_lex->join->select_distinct= false;
890     subq_select_lex->options&= ~SELECT_DISTINCT;
891   }
892 
893   /*
894     Remove GROUP BY if there are no aggregate functions and no HAVING
895     clause
896   */
897   if (subq_select_lex->group_list.elements &&
898       !subq_select_lex->with_sum_func && !subq_select_lex->join->having)
899   {
900     changelog|= REMOVE_GROUP;
901     for (ORDER *g= subq_select_lex->group_list.first; g != NULL; g= g->next)
902     {
903       if (*g->item == g->item_ptr)
904         (*g->item)->walk(&Item::clean_up_after_removal, true,
905                       static_cast<uchar*>(static_cast<void*>(subq_select_lex)));
906     }
907     subq_select_lex->join->group_list= NULL;
908     subq_select_lex->group_list.empty();
909     while (hidden_group_field_count-- > 0)
910     {
911       fields.pop();
912       ref_pointer_array[fields.elements]= NULL;
913     }
914   }
915 
916   if (changelog)
917   {
918     Opt_trace_context * trace= &subq_select_lex->join->thd->opt_trace;
919     if (unlikely(trace->is_started()))
920     {
921       Opt_trace_object trace_wrapper(trace);
922       Opt_trace_array trace_changes(trace, "transformations_to_subquery");
923       if (changelog & REMOVE_ORDER)
924         trace_changes.add_alnum("removed_ordering");
925       if (changelog & REMOVE_DISTINCT)
926         trace_changes.add_alnum("removed_distinct");
927       if (changelog & REMOVE_GROUP)
928         trace_changes.add_alnum("removed_grouping");
929     }
930   }
931 }
932 
933 
934 /**
935   Function to setup clauses without sum functions.
936 */
937 static inline int
setup_without_group(THD * thd,Ref_ptr_array ref_pointer_array,TABLE_LIST * tables,TABLE_LIST * leaves,List<Item> & fields,List<Item> & all_fields,Item ** conds,ORDER * order,ORDER * group,int * hidden_group_field_count,int * hidden_order_field_count)938 setup_without_group(THD *thd, Ref_ptr_array ref_pointer_array,
939                     TABLE_LIST *tables,
940                     TABLE_LIST *leaves,
941                     List<Item> &fields,
942                     List<Item> &all_fields,
943                     Item **conds,
944                     ORDER *order,
945                     ORDER *group,
946                     int *hidden_group_field_count,
947                     int *hidden_order_field_count)
948 {
949   int res;
950   st_select_lex *const select= thd->lex->current_select;
951   nesting_map save_allow_sum_func=thd->lex->allow_sum_func;
952   /*
953     Need to save the value, so we can turn off only any new non_agg_field_used
954     additions coming from the WHERE
955   */
956   const bool saved_non_agg_field_used= select->non_agg_field_used();
957   DBUG_ENTER("setup_without_group");
958 
959   thd->lex->allow_sum_func&= ~((nesting_map)1 << select->nest_level);
960   res= setup_conds(thd, tables, leaves, conds);
961 
962   /* it's not wrong to have non-aggregated columns in a WHERE */
963   select->set_non_agg_field_used(saved_non_agg_field_used);
964 
965   // GROUP BY
966   int all_fields_count= all_fields.elements;
967   res= res || setup_group(thd, ref_pointer_array, tables, fields, all_fields,
968                           group);
969   *hidden_group_field_count= all_fields.elements - all_fields_count;
970 
971   // ORDER BY
972   all_fields_count= all_fields.elements;
973   thd->lex->allow_sum_func|= (nesting_map)1 << select->nest_level;
974   res= res || setup_order(thd, ref_pointer_array, tables, fields, all_fields,
975                           order);
976   *hidden_order_field_count= all_fields.elements - all_fields_count;
977 
978   res= res || match_exprs_for_only_full_group_by(thd, all_fields,
979                                                  *hidden_group_field_count,
980                                                  *hidden_order_field_count,
981                                                  fields.elements, group);
982 
983   thd->lex->allow_sum_func= save_allow_sum_func;
984   DBUG_RETURN(res);
985 }
986 
987 
988 /*****************************************************************************
989   Group and order functions
990 *****************************************************************************/
991 
992 /**
993   Resolve an ORDER BY or GROUP BY column reference.
994 
995   Given a column reference (represented by 'order') from a GROUP BY or ORDER
996   BY clause, find the actual column it represents. If the column being
997   resolved is from the GROUP BY clause, the procedure searches the SELECT
998   list 'fields' and the columns in the FROM list 'tables'. If 'order' is from
999   the ORDER BY clause, only the SELECT list is being searched.
1000 
1001   If 'order' is resolved to an Item, then order->item is set to the found
1002   Item. If there is no item for the found column (that is, it was resolved
1003   into a table field), order->item is 'fixed' and is added to all_fields and
1004   ref_pointer_array.
1005 
1006   ref_pointer_array and all_fields are updated.
1007 
1008   @param[in] thd		     Pointer to current thread structure
1009   @param[in,out] ref_pointer_array  All select, group and order by fields
1010   @param[in] tables                 List of tables to search in (usually
1011     FROM clause)
1012   @param[in] order                  Column reference to be resolved
1013   @param[in] fields                 List of fields to search in (usually
1014     SELECT list)
1015   @param[in,out] all_fields         All select, group and order by fields
1016   @param[in] is_group_field         True if order is a GROUP field, false if
1017     ORDER by field
1018 
1019   @retval
1020     FALSE if OK
1021   @retval
1022     TRUE  if error occurred
1023 */
1024 
1025 static bool
find_order_in_list(THD * thd,Ref_ptr_array ref_pointer_array,TABLE_LIST * tables,ORDER * order,List<Item> & fields,List<Item> & all_fields,bool is_group_field)1026 find_order_in_list(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables,
1027                    ORDER *order, List<Item> &fields, List<Item> &all_fields,
1028                    bool is_group_field)
1029 {
1030   Item *order_item= *order->item; /* The item from the GROUP/ORDER caluse. */
1031   Item::Type order_item_type;
1032   Item **select_item; /* The corresponding item from the SELECT clause. */
1033   Field *from_field;  /* The corresponding field from the FROM clause. */
1034   uint counter;
1035   enum_resolution_type resolution;
1036 
1037   /*
1038     Local SP variables may be int but are expressions, not positions.
1039     (And they can't be used before fix_fields is called for them).
1040   */
1041   if (order_item->type() == Item::INT_ITEM && order_item->basic_const_item())
1042   {						/* Order by position */
1043     uint count= (uint) order_item->val_int();
1044     if (!count || count > fields.elements)
1045     {
1046       my_error(ER_BAD_FIELD_ERROR, MYF(0),
1047                order_item->full_name(), thd->where);
1048       return TRUE;
1049     }
1050     order->item= &ref_pointer_array[count - 1];
1051     order->in_field_list= 1;
1052     order->counter= count;
1053     order->counter_used= 1;
1054     return FALSE;
1055   }
1056   /* Lookup the current GROUP/ORDER field in the SELECT clause. */
1057   select_item= find_item_in_list(order_item, fields, &counter,
1058                                  REPORT_EXCEPT_NOT_FOUND, &resolution);
1059   if (!select_item)
1060     return TRUE; /* The item is not unique, or some other error occured. */
1061 
1062 
1063   /* Check whether the resolved field is not ambiguos. */
1064   if (select_item != not_found_item)
1065   {
1066     Item *view_ref= NULL;
1067     /*
1068       If we have found field not by its alias in select list but by its
1069       original field name, we should additionaly check if we have conflict
1070       for this name (in case if we would perform lookup in all tables).
1071     */
1072     if (resolution == RESOLVED_BEHIND_ALIAS && !order_item->fixed &&
1073         order_item->fix_fields(thd, order->item))
1074       return TRUE;
1075 
1076     /* Lookup the current GROUP field in the FROM clause. */
1077     order_item_type= order_item->type();
1078     from_field= (Field*) not_found_field;
1079     if ((is_group_field &&
1080         order_item_type == Item::FIELD_ITEM) ||
1081         order_item_type == Item::REF_ITEM)
1082     {
1083       from_field= find_field_in_tables(thd, (Item_ident*) order_item, tables,
1084                                        NULL, &view_ref, IGNORE_ERRORS, TRUE,
1085                                        FALSE);
1086       if (!from_field)
1087         from_field= (Field*) not_found_field;
1088     }
1089 
1090     if (from_field == not_found_field ||
1091         (from_field != view_ref_found ?
1092          /* it is field of base table => check that fields are same */
1093          ((*select_item)->type() == Item::FIELD_ITEM &&
1094           ((Item_field*) (*select_item))->field->eq(from_field)) :
1095          /*
1096            in is field of view table => check that references on translation
1097            table are same
1098          */
1099          ((*select_item)->type() == Item::REF_ITEM &&
1100           view_ref->type() == Item::REF_ITEM &&
1101           ((Item_ref *) (*select_item))->ref ==
1102           ((Item_ref *) view_ref)->ref)))
1103     {
1104       /*
1105         If there is no such field in the FROM clause, or it is the same field
1106         as the one found in the SELECT clause, then use the Item created for
1107         the SELECT field. As a result if there was a derived field that
1108         'shadowed' a table field with the same name, the table field will be
1109         chosen over the derived field.
1110 
1111         If we replace *order->item with one from the select list or
1112         from a table in the FROM list, we should clean up after
1113         removing the old *order->item from the query. The item has not
1114         been fixed (so there are no aggregation functions that need
1115         cleaning up), but it may contain subqueries that should be
1116         unlinked.
1117       */
1118       if (*order->item != *select_item)
1119         (*order->item)->walk(&Item::clean_up_after_removal, true, NULL);
1120       order->item= &ref_pointer_array[counter];
1121       order->in_field_list=1;
1122       if (resolution == RESOLVED_AGAINST_ALIAS)
1123         order->used_alias= true;
1124       return FALSE;
1125     }
1126     else
1127     {
1128       /*
1129         There is a field with the same name in the FROM clause. This
1130         is the field that will be chosen. In this case we issue a
1131         warning so the user knows that the field from the FROM clause
1132         overshadows the column reference from the SELECT list.
1133       */
1134       push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, ER_NON_UNIQ_ERROR,
1135                           ER(ER_NON_UNIQ_ERROR),
1136                           ((Item_ident*) order_item)->field_name,
1137                           current_thd->where);
1138     }
1139   }
1140 
1141   order->in_field_list=0;
1142   /*
1143     The call to order_item->fix_fields() means that here we resolve
1144     'order_item' to a column from a table in the list 'tables', or to
1145     a column in some outer query. Exactly because of the second case
1146     we come to this point even if (select_item == not_found_item),
1147     inspite of that fix_fields() calls find_item_in_list() one more
1148     time.
1149 
1150     We check order_item->fixed because Item_func_group_concat can put
1151     arguments for which fix_fields already was called.
1152 
1153     group_fix_field= TRUE is to resolve aliases from the SELECT list
1154     without creating of Item_ref-s: JOIN::exec() wraps aliased items
1155     in SELECT list with Item_copy items. To re-evaluate such a tree
1156     that includes Item_copy items we have to refresh Item_copy caches,
1157     but:
1158       - filesort() never refresh Item_copy items,
1159       - end_send_group() checks every record for group boundary by the
1160         test_if_group_changed function that obtain data from these
1161         Item_copy items, but the copy_fields function that
1162         refreshes Item copy items is called after group boundaries only -
1163         that is a vicious circle.
1164     So we prevent inclusion of Item_copy items.
1165   */
1166   bool save_group_fix_field= thd->lex->current_select->group_fix_field;
1167   if (is_group_field)
1168     thd->lex->current_select->group_fix_field= TRUE;
1169   bool ret= (!order_item->fixed &&
1170       (order_item->fix_fields(thd, order->item) ||
1171        (order_item= *order->item)->check_cols(1) ||
1172        thd->is_fatal_error));
1173   thd->lex->current_select->group_fix_field= save_group_fix_field;
1174   if (ret)
1175     return TRUE; /* Wrong field. */
1176 
1177   uint el= all_fields.elements;
1178   all_fields.push_front(order_item); /* Add new field to field list. */
1179   ref_pointer_array[el]= order_item;
1180   /*
1181     If the order_item is a SUM_FUNC_ITEM, when fix_fields is called
1182     ref_by is set to order->item which is the address of order_item.
1183     But this needs to be address of order_item in the all_fields list.
1184     As a result, when it gets replaced with Item_aggregate_ref
1185     object in Item::split_sum_func2, we will be able to retrieve the
1186     newly created object.
1187   */
1188   if (order_item->type() == Item::SUM_FUNC_ITEM)
1189     ((Item_sum *)order_item)->ref_by= all_fields.head_ref();
1190 
1191   /*
1192     Currently, we assume that this assertion holds. If it turns out
1193     that it fails for some query, order->item has changed and the old
1194     item is removed from the query. In that case, we must call walk()
1195     with clean_up_after_removal() on the old order->item.
1196   */
1197   DBUG_ASSERT(order_item == *order->item);
1198   order->item= &ref_pointer_array[el];
1199   return FALSE;
1200 }
1201 
1202 
1203 /**
1204   Change order to point at item in select list.
1205 
1206   If item isn't a number and doesn't exists in the select list, add it to the
1207   the field list.
1208 */
1209 
setup_order(THD * thd,Ref_ptr_array ref_pointer_array,TABLE_LIST * tables,List<Item> & fields,List<Item> & all_fields,ORDER * order)1210 int setup_order(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables,
1211 		List<Item> &fields, List<Item> &all_fields, ORDER *order)
1212 {
1213   thd->where="order clause";
1214   DBUG_ASSERT(thd->lex->current_select->cur_pos_in_all_fields ==
1215               SELECT_LEX::ALL_FIELDS_UNDEF_POS);
1216   for (; order; order=order->next)
1217   {
1218     thd->lex->current_select->cur_pos_in_all_fields=
1219       fields.elements - all_fields.elements - 1;
1220     if (find_order_in_list(thd, ref_pointer_array, tables, order, fields,
1221 			   all_fields, FALSE))
1222       return 1;
1223   }
1224   thd->lex->current_select->cur_pos_in_all_fields=
1225 		SELECT_LEX::ALL_FIELDS_UNDEF_POS;
1226   return 0;
1227 }
1228 
1229 
1230 /**
1231    Scans the SELECT list and ORDER BY list: for each expression, if it is not
1232    present in GROUP BY, examines the non-aggregated columns contained in the
1233    expression; if those columns are not all in GROUP BY, raise an error.
1234 
1235    Examples:
1236    1) "SELECT a+1 FROM t GROUP BY a+1"
1237    "a+1" in SELECT list was found, by setup_group() (exactly
1238    find_order_in_list()), to be the same as "a+1" in GROUP BY; as it is a
1239    GROUP BY expression, setup_group() has marked this expression with
1240    ALL_FIELDS_UNDEF_POS (item->marker= ALL_FIELDS_UNDEF_POS).
1241    2) "SELECT a+1 FROM t GROUP BY a"
1242    "a+1" is not found in GROUP BY; its non-aggregated column is "a", "a" is
1243    present in GROUP BY so it's ok.
1244 
1245    A "hidden" GROUP BY / ORDER BY expression is a member of GROUP BY / ORDER
1246    BY which was not found (by setup_order() or setup_group()) to be also
1247    present in the SELECT list. setup_order() and setup_group() have thus added
1248    the expression to the front of JOIN::all_fields.
1249 
1250    @param  thd                     THD pointer
1251    @param  all_fields              list of expressions, including SELECT list
1252                                    and hidden ORDER BY expressions
1253    @param  hidden_group_exprs_count number of hidden GROUP BY expressions
1254    @param  hidden_order_exprs_count number of hidden ORDER BY expressions
1255    @param  select_exprs_count      number of SELECT list expressions (there may
1256                                    be a gap between hidden GROUP BY expressions
1257                                    and SELECT list expressions)
1258    @param  group_exprs             GROUP BY expressions
1259 
1260    @returns true if ONLY_FULL_GROUP_BY is violated.
1261 */
1262 
1263 static bool
match_exprs_for_only_full_group_by(THD * thd,List<Item> & all_fields,int hidden_group_exprs_count,int hidden_order_exprs_count,int select_exprs_count,ORDER * group_exprs)1264 match_exprs_for_only_full_group_by(THD *thd, List<Item> &all_fields,
1265                                    int hidden_group_exprs_count,
1266                                    int hidden_order_exprs_count,
1267                                    int select_exprs_count,
1268                                    ORDER *group_exprs)
1269 {
1270   if (!group_exprs ||
1271       !(thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY))
1272     return false;
1273 
1274   /*
1275     For all expressions of the SELECT list and ORDER BY, a list of columns
1276     which aren't under an aggregate function, 'select_lex->non_agg_fields',
1277     has been created (see Item_field::fix_fields()). Each column in that list
1278     keeps in Item::marker the position, in all_fields, of the (SELECT
1279     list or ORDER BY) expression which it belongs to (see
1280     st_select_lex::cur_pos_in_all_fields). all_fields looks like this:
1281     (front) HIDDEN ORDER BY - HIDDEN GROUP BY - gap - SELECT LIST (back)
1282     "Gap" may contain some aggregate expressions (see Item::split_sum_func2())
1283     which are irrelevant to us.
1284 
1285     We take an expressions of the SELECT list or a hidden ORDER BY expression
1286     ('expr' variable).
1287     - (1) If it also belongs to the GROUP BY list, it's ok.
1288     - (2) If it is an aggregate function, it's ok.
1289     - (3) If is is a constant, it's ok.
1290     - (4) If it is a column resolved to an outer SELECT it's ok;
1291     indeed, it is a constant from the point of view of one execution of the
1292     inner SELECT - it does not introduce any randomness in the result.
1293     - Otherwise we scan the list of non-aggregated columns and if we find at
1294     least one column belonging to this expression and NOT occuring
1295     in the GROUP BY list, we throw an error.
1296   */
1297   List_iterator<Item> exprs_it(all_fields);
1298   /*
1299     All "idx*" variables below are indices in all_fields, with "index of
1300     front" = 0 and "index of back" = all_fields.elements - 1.
1301   */
1302   int idx= -1;
1303   const int idx_of_first_hidden_group= hidden_order_exprs_count;
1304   const int idx_of_first_select= all_fields.elements - select_exprs_count;
1305   /*
1306     Also an index in all_fields, but with the same counting convention as
1307     st_select_lex::cur_pos_in_all_fields.
1308   */
1309   int cur_pos_in_all_fields;
1310   Item *expr;
1311   Item_field *non_agg_field;
1312   List_iterator<Item_field>
1313     non_agg_fields_it(thd->lex->current_select->non_agg_fields);
1314 
1315   non_agg_field= non_agg_fields_it++;
1316   while (non_agg_field && (expr= exprs_it++))
1317   {
1318     idx++;
1319     if (idx >= idx_of_first_hidden_group &&     // In or after hidden GROUP BY
1320          idx < idx_of_first_select)             // but not yet in SELECT list
1321       continue;
1322     cur_pos_in_all_fields= idx - idx_of_first_select;
1323 
1324     if ((expr->marker == SELECT_LEX::ALL_FIELDS_UNDEF_POS) ||  // (1)
1325         expr->type() == Item::SUM_FUNC_ITEM ||                 // (2)
1326         expr->const_item() ||                                  // (3)
1327         (expr->real_item()->type() == Item::FIELD_ITEM &&
1328          expr->used_tables() & OUTER_REF_TABLE_BIT))           // (4)
1329       continue; // Ignore this expression.
1330 
1331     while (non_agg_field)
1332     {
1333       /*
1334         All non-aggregated columns contained in 'expr' have their
1335         'marker' equal to 'cur_pos_in_all_fields' OR equal to
1336         ALL_FIELDS_UNDEF_POS. The latter case happens in:
1337         "SELECT a FROM t GROUP BY a"
1338         when setup_group() finds that "a" in GROUP BY is also in the
1339         SELECT list ('fields' list); setup_group() marks the "a" expression
1340         with ALL_FIELDS_UNDEF_POS; at the same time, "a" is also a
1341         non-aggregated column of the "a" expression; thus, non-aggregated
1342         column "a" had its marker change from >=0 to
1343         ALL_FIELDS_UNDEF_POS. Such non-aggregated column can be ignored (and
1344         that is why ALL_FIELDS_UNDEF_POS is a very negative number).
1345       */
1346       if (non_agg_field->marker < cur_pos_in_all_fields)
1347       {
1348         /*
1349           Ignorable column, or the owning expression was found to be
1350           ignorable (cases 1-2-3-4 above); ignore it and switch to next
1351           column.
1352         */
1353         goto next_non_agg_field;
1354       }
1355       if (non_agg_field->marker > cur_pos_in_all_fields)
1356       {
1357         /*
1358           'expr' has been passed (we have scanned all its non-aggregated
1359           columns and are seeing one which belongs to a next expression),
1360           switch to next expression.
1361         */
1362         break;
1363       }
1364       // Check whether the non-aggregated column occurs in the GROUP BY list
1365       for (ORDER *grp= group_exprs; grp; grp= grp->next)
1366         if ((*grp->item)->eq(static_cast<Item *>(non_agg_field), false))
1367         {
1368           // column is in GROUP BY so is ok; check the next
1369           goto next_non_agg_field;
1370         }
1371       /*
1372         If we come here, one non-aggregated column belonging to 'expr' was
1373         not found in GROUP BY, we raise an error.
1374         TODO: change ER_WRONG_FIELD_WITH_GROUP to more detailed
1375         ER_NON_GROUPING_FIELD_USED
1376       */
1377       my_error(ER_WRONG_FIELD_WITH_GROUP, MYF(0), non_agg_field->full_name());
1378       return true;
1379   next_non_agg_field:
1380       non_agg_field= non_agg_fields_it++;
1381     }
1382   }
1383   return false;
1384 }
1385 
1386 
1387 /**
1388   Initialize the GROUP BY list.
1389 
1390   @param thd			Thread handler
1391   @param ref_pointer_array	We store references to all fields that was
1392                                not in 'fields' here.
1393   @param fields		All fields in the select part. Any item in
1394                                'order' that is part of these list is replaced
1395                                by a pointer to this fields.
1396   @param all_fields		Total list of all unique fields used by the
1397                                select. All items in 'order' that was not part
1398                                of fields will be added first to this list.
1399   @param order			The fields we should do GROUP BY on.
1400 
1401   @todo
1402     change ER_WRONG_FIELD_WITH_GROUP to more detailed
1403     ER_NON_GROUPING_FIELD_USED
1404 
1405   @retval
1406     0  ok
1407   @retval
1408     1  error (probably out of memory)
1409 */
1410 
1411 static int
setup_group(THD * thd,Ref_ptr_array ref_pointer_array,TABLE_LIST * tables,List<Item> & fields,List<Item> & all_fields,ORDER * order)1412 setup_group(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables,
1413             List<Item> &fields, List<Item> &all_fields, ORDER *order)
1414 {
1415   if (!order)
1416     return 0;				/* Everything is ok */
1417 
1418   thd->where="group statement";
1419   for (ORDER *ord= order; ord; ord= ord->next)
1420   {
1421     if (find_order_in_list(thd, ref_pointer_array, tables, ord, fields,
1422 			   all_fields, TRUE))
1423       return 1;
1424     // ONLY_FULL_GROUP_BY needn't verify this expression:
1425     (*ord->item)->marker= SELECT_LEX::ALL_FIELDS_UNDEF_POS;
1426     if ((*ord->item)->with_sum_func)
1427     {
1428       my_error(ER_WRONG_GROUP_FIELD, MYF(0), (*ord->item)->full_name());
1429       return 1;
1430     }
1431   }
1432   return 0;
1433 }
1434 
1435 
1436 /****************************************************************************
1437   ROLLUP handling
1438 ****************************************************************************/
1439 
1440 /**
1441   Replace occurences of group by fields in an expression by ref items.
1442 
1443   The function replaces occurrences of group by fields in expr
1444   by ref objects for these fields unless they are under aggregate
1445   functions.
1446   The function also corrects value of the the maybe_null attribute
1447   for the items of all subexpressions containing group by fields.
1448 
1449   @b EXAMPLES
1450     @code
1451       SELECT a+1 FROM t1 GROUP BY a WITH ROLLUP
1452       SELECT SUM(a)+a FROM t1 GROUP BY a WITH ROLLUP
1453   @endcode
1454 
1455   @b IMPLEMENTATION
1456 
1457     The function recursively traverses the tree of the expr expression,
1458     looks for occurrences of the group by fields that are not under
1459     aggregate functions and replaces them for the corresponding ref items.
1460 
1461   @note
1462     This substitution is needed GROUP BY queries with ROLLUP if
1463     SELECT list contains expressions over group by attributes.
1464 
1465   @param thd                  reference to the context
1466   @param expr                 expression to make replacement
1467   @param group_list           list of references to group by items
1468   @param changed        out:  returns 1 if item contains a replaced field item
1469 
1470   @todo
1471     - TODO: Some functions are not null-preserving. For those functions
1472     updating of the maybe_null attribute is an overkill.
1473 
1474   @retval
1475     0	if ok
1476   @retval
1477     1   on error
1478 */
1479 
change_group_ref(THD * thd,Item_func * expr,ORDER * group_list,bool * changed)1480 static bool change_group_ref(THD *thd, Item_func *expr, ORDER *group_list,
1481                              bool *changed)
1482 {
1483   if (expr->arg_count)
1484   {
1485     Name_resolution_context *context= &thd->lex->current_select->context;
1486     Item **arg,**arg_end;
1487     bool arg_changed= FALSE;
1488     for (arg= expr->arguments(),
1489          arg_end= expr->arguments()+expr->arg_count;
1490          arg != arg_end; arg++)
1491     {
1492       Item *item= *arg;
1493       if (item->type() == Item::FIELD_ITEM || item->type() == Item::REF_ITEM)
1494       {
1495         ORDER *group_tmp;
1496         for (group_tmp= group_list; group_tmp; group_tmp= group_tmp->next)
1497         {
1498           if (item->eq(*group_tmp->item,0))
1499           {
1500             Item *new_item;
1501             if (!(new_item= new Item_ref(context, group_tmp->item, 0,
1502                                         item->item_name.ptr())))
1503               return 1;                                 // fatal_error is set
1504             thd->change_item_tree(arg, new_item);
1505             arg_changed= TRUE;
1506           }
1507         }
1508       }
1509       else if (item->type() == Item::FUNC_ITEM)
1510       {
1511         if (change_group_ref(thd, (Item_func *) item, group_list, &arg_changed))
1512           return 1;
1513       }
1514     }
1515     if (arg_changed)
1516     {
1517       expr->maybe_null= 1;
1518       *changed= TRUE;
1519     }
1520   }
1521   return 0;
1522 }
1523 
1524 
1525 /** Allocate memory needed for other rollup functions. */
1526 
rollup_init()1527 bool JOIN::rollup_init()
1528 {
1529   uint i,j;
1530   Item **ref_array;
1531   ORDER *group_tmp;
1532 
1533   tmp_table_param.quick_group= 0;	// Can't create groups in tmp table
1534   rollup.state= ROLLUP::STATE_INITED;
1535 
1536   /*
1537     Create pointers to the different sum function groups
1538     These are updated by rollup_make_fields()
1539   */
1540   tmp_table_param.group_parts= send_group_parts;
1541 
1542   Item_null_result **null_items=
1543     static_cast<Item_null_result**>(thd->alloc(sizeof(Item*)*send_group_parts));
1544 
1545   rollup.null_items= Item_null_array(null_items, send_group_parts);
1546   rollup.ref_pointer_arrays=
1547     static_cast<Ref_ptr_array*>
1548     (thd->alloc((sizeof(Ref_ptr_array) +
1549                  all_fields.elements * sizeof(Item*)) * send_group_parts));
1550   rollup.fields=
1551     static_cast<List<Item>*>(thd->alloc(sizeof(List<Item>) * send_group_parts));
1552 
1553   if (!null_items || !rollup.ref_pointer_arrays || !rollup.fields)
1554     return true;
1555 
1556   ref_array= (Item**) (rollup.ref_pointer_arrays+send_group_parts);
1557 
1558   /*
1559     Prepare space for field list for the different levels
1560     These will be filled up in rollup_make_fields()
1561   */
1562   group_tmp= group_list;
1563   for (i= 0 ; i < send_group_parts ; i++)
1564   {
1565     rollup.null_items[i]=
1566       new (thd->mem_root) Item_null_result((*group_tmp->item)->field_type(),
1567                                            (*group_tmp->item)->result_type());
1568     List<Item> *rollup_fields= &rollup.fields[i];
1569     rollup_fields->empty();
1570     rollup.ref_pointer_arrays[i]= Ref_ptr_array(ref_array, all_fields.elements);
1571     ref_array+= all_fields.elements;
1572     group_tmp= group_tmp->next;
1573   }
1574   for (i= 0 ; i < send_group_parts; i++)
1575   {
1576     for (j=0 ; j < fields_list.elements ; j++)
1577       rollup.fields[i].push_back(rollup.null_items[i]);
1578   }
1579   List_iterator<Item> it(all_fields);
1580   Item *item;
1581   while ((item= it++))
1582   {
1583     bool found_in_group= 0;
1584 
1585     for (group_tmp= group_list; group_tmp; group_tmp= group_tmp->next)
1586     {
1587       if (*group_tmp->item == item)
1588       {
1589         item->maybe_null= 1;
1590         found_in_group= 1;
1591         break;
1592       }
1593     }
1594     if (item->type() == Item::FUNC_ITEM && !found_in_group)
1595     {
1596       bool changed= FALSE;
1597       if (change_group_ref(thd, (Item_func *) item, group_list, &changed))
1598         return 1;
1599       /*
1600         We have to prevent creation of a field in a temporary table for
1601         an expression that contains GROUP BY attributes.
1602         Marking the expression item as 'with_sum_func' will ensure this.
1603       */
1604       if (changed)
1605         item->with_sum_func= 1;
1606     }
1607   }
1608   return 0;
1609 }
1610 
1611 /**
1612   @} (end of group Query_Resolver)
1613 */
1614