1 /* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23 /**
24 @file
25
26 @brief
27 Implementation of name resolution stage
28
29
30 @defgroup Query_Resolver Query Resolver
31 @{
32 */
33
34 #include "sql_select.h"
35 #include "sql_resolver.h"
36 #include "sql_optimizer.h"
37 #include "opt_trace.h"
38 #include "sql_base.h"
39 #include "sql_acl.h"
40 #include "opt_explain_format.h"
41
42 static void remove_redundant_subquery_clauses(st_select_lex *subq_select_lex,
43 int hidden_group_field_count,
44 int hidden_order_field_count,
45 List<Item> &fields,
46 Ref_ptr_array ref_pointer_array);
47 static inline int
48 setup_without_group(THD *thd, Ref_ptr_array ref_pointer_array,
49 TABLE_LIST *tables,
50 TABLE_LIST *leaves,
51 List<Item> &fields,
52 List<Item> &all_fields,
53 Item **conds,
54 ORDER *order,
55 ORDER *group,
56 int *hidden_group_field_count,
57 int *hidden_order_field_count);
58 static bool resolve_subquery(THD *thd, JOIN *join);
59 static int
60 setup_group(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables,
61 List<Item> &fields, List<Item> &all_fields, ORDER *order);
62 static bool
63 match_exprs_for_only_full_group_by(THD *thd, List<Item> &all_fields,
64 int hidden_group_exprs_count,
65 int hidden_order_exprs_count,
66 int select_exprs_count,
67 ORDER *group_exprs);
68
69
70 /**
71 Prepare of whole select (including sub queries in future).
72
73 @todo
74 Add check of calculation of GROUP functions and fields:
75 SELECT COUNT(*)+table.col1 from table1;
76
77 @retval
78 -1 on error
79 @retval
80 0 on success
81 */
82 int
prepare(TABLE_LIST * tables_init,uint wild_num,Item * conds_init,uint og_num,ORDER * order_init,ORDER * group_init,Item * having_init,SELECT_LEX * select_lex_arg,SELECT_LEX_UNIT * unit_arg)83 JOIN::prepare(TABLE_LIST *tables_init,
84 uint wild_num, Item *conds_init, uint og_num,
85 ORDER *order_init, ORDER *group_init,
86 Item *having_init,
87 SELECT_LEX *select_lex_arg,
88 SELECT_LEX_UNIT *unit_arg)
89 {
90 DBUG_ENTER("JOIN::prepare");
91
92 // to prevent double initialization on EXPLAIN
93 if (optimized)
94 DBUG_RETURN(0);
95
96 // We may do subquery transformation, or Item substitution:
97 Prepare_error_tracker tracker(thd);
98
99 if (order_init)
100 explain_flags.set(ESC_ORDER_BY, ESP_EXISTS);
101 if (group_init)
102 explain_flags.set(ESC_GROUP_BY, ESP_EXISTS);
103 if (select_options & SELECT_DISTINCT)
104 explain_flags.set(ESC_DISTINCT, ESP_EXISTS);
105
106 conds= conds_init;
107 order= ORDER_with_src(order_init, ESC_ORDER_BY);
108 group_list= ORDER_with_src(group_init, ESC_GROUP_BY);
109 having= having_for_explain= having_init;
110 tables_list= tables_init;
111 DBUG_ASSERT(!unit_arg->cleaned);
112 select_lex= select_lex_arg;
113 select_lex->join= this;
114 join_list= &select_lex->top_join_list;
115 union_part= unit_arg->is_union();
116
117 thd->lex->current_select->is_item_list_lookup= 1;
118 /*
119 If we have already executed SELECT, then it have not sense to prevent
120 its table from update (see unique_table())
121 */
122 if (thd->derived_tables_processing)
123 select_lex->exclude_from_table_unique_test= TRUE;
124
125 Opt_trace_context * const trace= &thd->opt_trace;
126 Opt_trace_object trace_wrapper(trace);
127 Opt_trace_object trace_prepare(trace, "join_preparation");
128 trace_prepare.add_select_number(select_lex->select_number);
129 Opt_trace_array trace_steps(trace, "steps");
130
131 /* Check that all tables, fields, conds and order are ok */
132
133 if (!(select_options & OPTION_SETUP_TABLES_DONE) &&
134 setup_tables_and_check_access(thd, &select_lex->context, join_list,
135 tables_list, &select_lex->leaf_tables,
136 FALSE, SELECT_ACL, SELECT_ACL))
137 DBUG_RETURN(-1);
138
139 select_lex->derived_table_count= 0;
140 select_lex->materialized_table_count= 0;
141 select_lex->partitioned_table_count= 0;
142
143 TABLE_LIST *table_ptr;
144 for (table_ptr= select_lex->leaf_tables;
145 table_ptr;
146 table_ptr= table_ptr->next_leaf)
147 {
148 primary_tables++; // Count the primary input tables of the query
149 if (table_ptr->derived)
150 select_lex->derived_table_count++;
151 if (table_ptr->uses_materialization())
152 select_lex->materialized_table_count++;
153 #ifdef WITH_PARTITION_STORAGE_ENGINE
154 if (table_ptr->table->part_info)
155 select_lex->partitioned_table_count++;
156 #endif
157 }
158 tables= primary_tables; // This is currently the total number of tables
159
160 /*
161 Item and Item_field CTORs will both increment some counters
162 in current_select, based on the current parsing context.
163 We are not parsing anymore: any new Items created now are due to
164 query rewriting, so stop incrementing counters.
165 */
166 DBUG_ASSERT(select_lex->parsing_place == NO_MATTER);
167 select_lex->parsing_place= NO_MATTER;
168
169 if (setup_wild(thd, tables_list, fields_list, &all_fields, wild_num))
170 DBUG_RETURN(-1);
171 if (select_lex->setup_ref_array(thd, og_num))
172 DBUG_RETURN(-1);
173
174 ref_ptrs= ref_ptr_array_slice(0);
175
176 if (setup_fields(thd, ref_ptrs, fields_list, MARK_COLUMNS_READ,
177 &all_fields, 1))
178 DBUG_RETURN(-1);
179
180 int hidden_order_field_count;
181 if (setup_without_group(thd, ref_ptrs, tables_list,
182 select_lex->leaf_tables, fields_list,
183 all_fields, &conds, order, group_list,
184 &hidden_group_field_count,
185 &hidden_order_field_count))
186 DBUG_RETURN(-1);
187
188 /*
189 Permanently remove redundant parts from the query if
190 1) This is a subquery
191 2) This is the first time this query is optimized (since the
192 transformation is permanent)
193 3) Not normalizing a view. Removal should take place when a
194 query involving a view is optimized, not when the view
195 is created
196 */
197 if (select_lex->master_unit()->item && // 1)
198 select_lex->first_cond_optimization && // 2)
199 !(thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW)) // 3)
200 {
201 remove_redundant_subquery_clauses(select_lex, hidden_group_field_count,
202 hidden_order_field_count,
203 all_fields, ref_ptrs);
204 }
205
206 if (having)
207 {
208 nesting_map save_allow_sum_func= thd->lex->allow_sum_func;
209 thd->where="having clause";
210 thd->lex->allow_sum_func|= (nesting_map)1 << select_lex_arg->nest_level;
211 select_lex->having_fix_field= 1;
212 select_lex->resolve_place= st_select_lex::RESOLVE_HAVING;
213 bool having_fix_rc= (!having->fixed &&
214 (having->fix_fields(thd, &having) ||
215 having->check_cols(1)));
216 select_lex->having_fix_field= 0;
217 having_for_explain= select_lex->having= having;
218
219 select_lex->resolve_place= st_select_lex::RESOLVE_NONE;
220 if (having_fix_rc || thd->is_error())
221 DBUG_RETURN(-1); /* purecov: inspected */
222 thd->lex->allow_sum_func= save_allow_sum_func;
223 }
224
225 /*
226 Printing the expanded query should happen here and not elsewhere, because
227 when a view is merged (when the view is opened in open_tables()), the
228 parent query's select_lex does not yet contain a correct WHERE clause (it
229 misses the view's merged WHERE clause). This is corrected only just above,
230 in TABLE_LIST::prep_where(), called by
231 setup_without_group()->setup_conds().
232 We also have to wait for fix_fields() on HAVING, above.
233 At this stage, we also have properly set up Item_ref-s.
234 */
235 {
236 Opt_trace_object trace_wrapper(trace);
237 opt_trace_print_expanded_query(thd, select_lex, &trace_wrapper);
238 }
239
240 /*
241 When normalizing a view (like when writing a view's body to the FRM),
242 subquery transformations don't apply (if they did, IN->EXISTS could not be
243 undone in favour of materialization, when optimizing a later statement
244 using the view)
245 */
246 if (select_lex->master_unit()->item && // This is a subquery
247 // Not normalizing a view
248 !(thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW) &&
249 !(select_options & SELECT_DESCRIBE)) // Not within a describe
250 {
251 /* Join object is a subquery within an IN/ANY/ALL/EXISTS predicate */
252 if (resolve_subquery(thd, this))
253 DBUG_RETURN(-1);
254 }
255
256 select_lex->fix_prepare_information(thd, &conds, &having);
257
258 if (order)
259 {
260 bool real_order= FALSE;
261 ORDER *ord;
262 for (ord= order; ord; ord= ord->next)
263 {
264 Item *item= *ord->item;
265 /*
266 Disregard sort order if there's only
267 zero length NOT NULL fields (e.g. {VAR}CHAR(0) NOT NULL") or
268 zero length NOT NULL string functions there.
269 Such tuples don't contain any data to sort.
270 */
271 if (!real_order &&
272 /* Not a zero length NOT NULL field */
273 ((item->type() != Item::FIELD_ITEM ||
274 ((Item_field *) item)->field->maybe_null() ||
275 ((Item_field *) item)->field->sort_length()) &&
276 /* AND not a zero length NOT NULL string function. */
277 (item->type() != Item::FUNC_ITEM ||
278 item->maybe_null ||
279 item->result_type() != STRING_RESULT ||
280 item->max_length)))
281 real_order= TRUE;
282
283 if (item->with_sum_func && item->type() != Item::SUM_FUNC_ITEM)
284 item->split_sum_func(thd, ref_ptrs, all_fields);
285 }
286 if (!real_order)
287 order= NULL;
288 }
289
290 if (having && having->with_sum_func)
291 having->split_sum_func2(thd, ref_ptrs,
292 all_fields, &having, TRUE);
293 if (select_lex->inner_sum_func_list)
294 {
295 Item_sum *end=select_lex->inner_sum_func_list;
296 Item_sum *item_sum= end;
297 do
298 {
299 item_sum= item_sum->next;
300 item_sum->split_sum_func2(thd, ref_ptrs,
301 all_fields, item_sum->ref_by, FALSE);
302 } while (item_sum != end);
303 }
304
305 if (select_lex->inner_refs_list.elements &&
306 fix_inner_refs(thd, all_fields, select_lex, ref_ptrs,
307 group_list))
308 DBUG_RETURN(-1);
309
310 if (group_list)
311 {
312 /*
313 Because HEAP tables can't index BIT fields we need to use an
314 additional hidden field for grouping because later it will be
315 converted to a LONG field. Original field will remain of the
316 BIT type and will be returned to a client.
317 */
318 for (ORDER *ord= group_list; ord; ord= ord->next)
319 {
320 if ((*ord->item)->type() == Item::FIELD_ITEM &&
321 (*ord->item)->field_type() == MYSQL_TYPE_BIT)
322 {
323 Item_field *field= new Item_field(thd, *(Item_field**)ord->item);
324 int el= all_fields.elements;
325 ref_ptrs[el]= field;
326 all_fields.push_front(field);
327 ord->item= &ref_ptrs[el];
328 }
329 }
330 }
331
332 if (setup_ftfuncs(select_lex)) /* should be after having->fix_fields */
333 DBUG_RETURN(-1);
334
335
336 /*
337 Check if there are references to un-aggregated columns when computing
338 aggregate functions with implicit grouping (there is no GROUP BY).
339 */
340 if (thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY && !group_list &&
341 select_lex->non_agg_field_used() &&
342 select_lex->agg_func_used())
343 {
344 my_message(ER_MIX_OF_GROUP_FUNC_AND_FIELDS,
345 ER(ER_MIX_OF_GROUP_FUNC_AND_FIELDS), MYF(0));
346 DBUG_RETURN(-1);
347 }
348 {
349 /* Caclulate the number of groups */
350 send_group_parts= 0;
351 for (ORDER *group_tmp= group_list ; group_tmp ; group_tmp= group_tmp->next)
352 send_group_parts++;
353 }
354
355
356 if (result && result->prepare(fields_list, unit_arg))
357 DBUG_RETURN(-1); /* purecov: inspected */
358
359 /* Init join struct */
360 count_field_types(select_lex, &tmp_table_param, all_fields, 0);
361 this->group= group_list != 0;
362 unit= unit_arg;
363
364 if (tmp_table_param.sum_func_count && !group_list)
365 {
366 implicit_grouping= TRUE;
367 // Result will contain zero or one row - ordering is meaningless
368 order= NULL;
369 }
370
371 #ifdef RESTRICTED_GROUP
372 if (implicit_grouping)
373 {
374 my_message(ER_WRONG_SUM_SELECT,ER(ER_WRONG_SUM_SELECT),MYF(0));
375 DBUG_RETURN(-1); /* purecov: inspected */
376 }
377 #endif
378 if (select_lex->olap == ROLLUP_TYPE && rollup_init())
379 DBUG_RETURN(-1); /* purecov: inspected */
380 if (alloc_func_list())
381 DBUG_RETURN(-1); /* purecov: inspected */
382
383 #ifdef WITH_PARTITION_STORAGE_ENGINE
384 if (select_lex->partitioned_table_count)
385 {
386 for (TABLE_LIST *tbl= select_lex->leaf_tables; tbl; tbl= tbl->next_leaf)
387 {
388 /*
389 This will only prune constant conditions, which will be used for
390 lock pruning.
391 */
392 if (prune_partitions(thd, tbl->table,
393 tbl->join_cond() ? tbl->join_cond() : conds))
394 DBUG_RETURN(-1); /* purecov: inspected */
395 }
396 }
397 #endif
398
399 DBUG_RETURN(0); // All OK
400 }
401
402
403 /**
404 Check if the subquery predicate can be executed via materialization.
405
406 @param predicate IN subquery predicate
407 @param thd THD
408 @param select_lex SELECT_LEX of the subquery
409 @param outer Parent SELECT_LEX (outer to subquery)
410
411 @return TRUE if subquery allows materialization, FALSE otherwise.
412 */
413
subquery_allows_materialization(Item_in_subselect * predicate,THD * thd,SELECT_LEX * select_lex,const SELECT_LEX * outer)414 bool subquery_allows_materialization(Item_in_subselect *predicate,
415 THD *thd,
416 SELECT_LEX *select_lex,
417 const SELECT_LEX *outer)
418 {
419 bool has_nullables= false;
420 const uint elements= predicate->unit->first_select()->item_list.elements;
421 DBUG_ENTER("subquery_allows_materialization");
422 DBUG_ASSERT(elements >= 1);
423 DBUG_ASSERT(predicate->left_expr->cols() == elements);
424
425 OPT_TRACE_TRANSFORM(&thd->opt_trace, trace_wrapper, trace_mat,
426 select_lex->select_number,
427 "IN (SELECT)", "materialization");
428
429 const char *cause= NULL;
430 if (predicate->substype() != Item_subselect::IN_SUBS)
431 {
432 // Subq-mat cannot handle 'outer_expr > {ANY|ALL}(subq)'...
433 cause= "not an IN predicate";
434 }
435 else if (select_lex->is_part_of_union())
436 {
437 // Subquery must be a single query specification clause (not a UNION)
438 cause= "in UNION";
439 }
440 else if (!select_lex->master_unit()->first_select()->leaf_tables)
441 {
442 // Subquery has no tables, hence no point in materializing.
443 cause= "no inner tables";
444 }
445 else if (!outer->join)
446 {
447 /*
448 Maybe this is a subquery of a single table UPDATE/DELETE (TODO:
449 handle this by switching to multi-table UPDATE/DELETE).
450 */
451 cause= "parent query has no JOIN";
452 }
453 else if (!outer->leaf_tables)
454 {
455 // The upper query is SELECT ... FROM DUAL. No gain in materializing.
456 cause= "no tables in outer query";
457 }
458 else if (predicate->originally_dependent())
459 {
460 /*
461 Subquery should not be correlated; the correlation due to predicates
462 injected by IN->EXISTS does not count as we will remove them if we
463 choose materialization.
464
465 TODO:
466 This is an overly restrictive condition. It can be extended to:
467 (Subquery is non-correlated ||
468 Subquery is correlated to any query outer to IN predicate ||
469 (Subquery is correlated to the immediate outer query &&
470 Subquery !contains {GROUP BY, ORDER BY [LIMIT],
471 aggregate functions}) && subquery predicate is not under "NOT IN"))
472 */
473 cause= "correlated";
474 }
475 else
476 {
477 /*
478 Check that involved expression types allow materialization.
479 This is a temporary fix for BUG#36752; see bug report for
480 description of restrictions we need to put on the compared expressions.
481 */
482 DBUG_ASSERT(predicate->left_expr->fixed);
483 List_iterator<Item> it(predicate->unit->first_select()->item_list);
484
485 for (uint i= 0; i < elements; i++)
486 {
487 Item * const inner= it++;
488 Item * const outer= predicate->left_expr->element_index(i);
489 if (!types_allow_materialization(outer, inner))
490 {
491 cause= "type mismatch";
492 break;
493 }
494 if (inner->is_blob_field()) // 6
495 {
496 cause= "inner blob";
497 break;
498 }
499 has_nullables|= outer->maybe_null | inner->maybe_null;
500 }
501
502 if (!cause)
503 {
504 trace_mat.add("has_nullable_expressions", has_nullables);
505 /*
506 Subquery materialization cannot handle NULLs partial matching
507 properly, yet. If the outer or inner values are NULL, the
508 subselect_hash_sj_engine may reply FALSE when it should reply UNKNOWN.
509 So, we must limit it to those three cases:
510 - when FALSE and UNKNOWN are equivalent answers. I.e. this is a a
511 top-level predicate (this implies it is not negated).
512 - when outer and inner values cannot be NULL.
513 - when there is a single inner column (because for this we have a
514 limited implementation of NULLs partial matching).
515 */
516 const bool is_top_level= predicate->is_top_level_item();
517 trace_mat.add("treat_UNKNOWN_as_FALSE", is_top_level);
518
519 if (!is_top_level && has_nullables && (elements > 1))
520 cause= "cannot_handle_partial_matches";
521 else
522 {
523 trace_mat.add("possible", true);
524 DBUG_RETURN(TRUE);
525 }
526 }
527 }
528 DBUG_ASSERT(cause != NULL);
529 trace_mat.add("possible", false).add_alnum("cause", cause);
530 DBUG_RETURN(false);
531 }
532
533
534 /**
535 @brief Resolve predicate involving subquery
536
537 @param thd Pointer to THD.
538 @param join Join that is part of a subquery predicate.
539
540 @retval FALSE Success.
541 @retval TRUE Error.
542
543 @details
544 Perform early unconditional subquery transformations:
545 - Convert subquery predicate into semi-join, or
546 - Mark the subquery for execution using materialization, or
547 - Perform IN->EXISTS transformation, or
548 - Perform more/less ALL/ANY -> MIN/MAX rewrite
549 - Substitute trivial scalar-context subquery with its value
550
551 @todo for PS, make the whole block execute only on the first execution
552
553 */
554
resolve_subquery(THD * thd,JOIN * join)555 static bool resolve_subquery(THD *thd, JOIN *join)
556 {
557 DBUG_ENTER("resolve_subquery");
558
559 bool chose_semijoin= false;
560 SELECT_LEX *const select_lex= join->select_lex;
561 SELECT_LEX *const outer= select_lex->outer_select();
562
563 /*
564 @todo for PS, make the whole block execute only on the first execution.
565 resolve_subquery() is only invoked in the first execution for subqueries
566 that are transformed to semijoin, but for other subqueries, this function
567 is called for every execution. One solution is perhaps to define
568 exec_method in class Item_subselect and exit immediately if unequal to
569 EXEC_UNSPECIFIED.
570 */
571 Item_subselect *subq_predicate= select_lex->master_unit()->item;
572 DBUG_ASSERT(subq_predicate);
573 /**
574 @note
575 In this case: IN (SELECT ... UNION SELECT ...), JOIN::prepare() is
576 called for each of the two UNION members, and in those two calls,
577 subq_predicate is the same, not sure this is desired (double work?).
578 */
579
580 Item_in_subselect * const in_predicate=
581 (subq_predicate->substype() == Item_subselect::IN_SUBS) ?
582 static_cast<Item_in_subselect *>(subq_predicate) : NULL;
583
584 if (in_predicate)
585 {
586 DBUG_ASSERT(select_lex == thd->lex->current_select);
587 thd->lex->current_select= outer;
588 char const *save_where= thd->where;
589 thd->where= "IN/ALL/ANY subquery";
590
591 bool result= !in_predicate->left_expr->fixed &&
592 in_predicate->left_expr->fix_fields(thd,
593 &in_predicate->left_expr);
594 thd->lex->current_select= select_lex;
595 thd->where= save_where;
596 if (result)
597 DBUG_RETURN(TRUE); /* purecov: deadcode */
598
599 /*
600 Check if the left and right expressions have the same # of
601 columns, i.e. we don't have a case like
602 (oe1, oe2) IN (SELECT ie1, ie2, ie3 ...)
603
604 TODO why do we have this duplicated in IN->EXISTS transformers?
605 psergey-todo: fix these: grep for duplicated_subselect_card_check
606 */
607 if (select_lex->item_list.elements != in_predicate->left_expr->cols())
608 {
609 my_error(ER_OPERAND_COLUMNS, MYF(0), in_predicate->left_expr->cols());
610 DBUG_RETURN(TRUE);
611 }
612
613 }
614
615 DBUG_PRINT("info", ("Checking if subq can be converted to semi-join"));
616 /*
617 Check if we're in subquery that is a candidate for flattening into a
618 semi-join (which is done in flatten_subqueries()). The requirements are:
619 1. Subquery predicate is an IN/=ANY subquery predicate
620 2. Subquery is a single SELECT (not a UNION)
621 3. Subquery does not have GROUP BY
622 4. Subquery does not use aggregate functions or HAVING
623 5. Subquery predicate is at the AND-top-level of ON/WHERE clause
624 6. We are not in a subquery of a single table UPDATE/DELETE that
625 doesn't have a JOIN (TODO: We should handle this at some
626 point by switching to multi-table UPDATE/DELETE)
627 7. We're not in a confluent table-less subquery, like "SELECT 1".
628 8. No execution method was already chosen (by a prepared statement)
629 9. Parent select is not a confluent table-less select
630 10. Neither parent nor child select have STRAIGHT_JOIN option.
631 */
632 if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_SEMIJOIN) &&
633 in_predicate && // 1
634 !select_lex->is_part_of_union() && // 2
635 !select_lex->group_list.elements && // 3
636 !join->having && !select_lex->with_sum_func && // 4
637 (outer->resolve_place == st_select_lex::RESOLVE_CONDITION || // 5
638 outer->resolve_place == st_select_lex::RESOLVE_JOIN_NEST) && // 5
639 outer->join && // 6
640 select_lex->master_unit()->first_select()->leaf_tables && // 7
641 in_predicate->exec_method ==
642 Item_exists_subselect::EXEC_UNSPECIFIED && // 8
643 outer->leaf_tables && // 9
644 !((join->select_options | outer->join->select_options)
645 & SELECT_STRAIGHT_JOIN)) // 10
646 {
647 DBUG_PRINT("info", ("Subquery is semi-join conversion candidate"));
648
649 /* Notify in the subquery predicate where it belongs in the query graph */
650 in_predicate->embedding_join_nest= outer->resolve_nest;
651
652 /* Register the subquery for further processing in flatten_subqueries() */
653 outer->join->sj_subselects.push_back(in_predicate);
654 chose_semijoin= true;
655 }
656
657 if (in_predicate)
658 {
659 Opt_trace_context * const trace= &join->thd->opt_trace;
660 OPT_TRACE_TRANSFORM(trace, oto0, oto1,
661 select_lex->select_number, "IN (SELECT)", "semijoin");
662 oto1.add("chosen", chose_semijoin);
663 }
664
665 if (!chose_semijoin &&
666 subq_predicate->select_transformer(join) == Item_subselect::RES_ERROR)
667 DBUG_RETURN(TRUE);
668
669 DBUG_RETURN(FALSE);
670 }
671
672
673 /**
674 Fix fields referenced from inner selects.
675
676 @param thd Thread handle
677 @param all_fields List of all fields used in select
678 @param select Current select
679 @param ref_pointer_array Array of references to Items used in current select
680 @param group_list GROUP BY list (is NULL by default)
681
682 @details
683 The function serves 3 purposes
684
685 - adds fields referenced from inner query blocks to the current select list
686
687 - Decides which class to use to reference the items (Item_ref or
688 Item_direct_ref)
689
690 - fixes references (Item_ref objects) to these fields.
691
692 If a field isn't already on the select list and the ref_pointer_array
693 is provided then it is added to the all_fields list and the pointer to
694 it is saved in the ref_pointer_array.
695
696 The class to access the outer field is determined by the following rules:
697
698 -#. If the outer field isn't used under an aggregate function then the
699 Item_ref class should be used.
700
701 -#. If the outer field is used under an aggregate function and this
702 function is, in turn, aggregated in the query block where the outer
703 field was resolved or some query nested therein, then the
704 Item_direct_ref class should be used. Also it should be used if we are
705 grouping by a subquery containing the outer field.
706
707 The resolution is done here and not at the fix_fields() stage as
708 it can be done only after aggregate functions are fixed and pulled up to
709 selects where they are to be aggregated.
710
711 When the class is chosen it substitutes the original field in the
712 Item_outer_ref object.
713
714 After this we proceed with fixing references (Item_outer_ref objects) to
715 this field from inner subqueries.
716
717 @return Status
718 @retval true An error occured.
719 @retval false OK.
720 */
721
722 bool
fix_inner_refs(THD * thd,List<Item> & all_fields,SELECT_LEX * select,Ref_ptr_array ref_pointer_array,ORDER * group_list)723 fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select,
724 Ref_ptr_array ref_pointer_array, ORDER *group_list)
725 {
726 Item_outer_ref *ref;
727
728 List_iterator<Item_outer_ref> ref_it(select->inner_refs_list);
729 while ((ref= ref_it++))
730 {
731 bool direct_ref= false;
732 Item *item= ref->outer_ref;
733 Item **item_ref= ref->ref;
734 Item_ref *new_ref;
735 /*
736 TODO: this field item already might be present in the select list.
737 In this case instead of adding new field item we could use an
738 existing one. The change will lead to less operations for copying fields,
739 smaller temporary tables and less data passed through filesort.
740 */
741 if (!ref_pointer_array.is_null() && !ref->found_in_select_list)
742 {
743 int el= all_fields.elements;
744 ref_pointer_array[el]= item;
745 /* Add the field item to the select list of the current select. */
746 all_fields.push_front(item);
747 /*
748 If it's needed reset each Item_ref item that refers this field with
749 a new reference taken from ref_pointer_array.
750 */
751 item_ref= &ref_pointer_array[el];
752 }
753
754 if (ref->in_sum_func)
755 {
756 Item_sum *sum_func;
757 if (ref->in_sum_func->nest_level > select->nest_level)
758 direct_ref= TRUE;
759 else
760 {
761 for (sum_func= ref->in_sum_func; sum_func &&
762 sum_func->aggr_level >= select->nest_level;
763 sum_func= sum_func->in_sum_func)
764 {
765 if (sum_func->aggr_level == select->nest_level)
766 {
767 direct_ref= TRUE;
768 break;
769 }
770 }
771 }
772 }
773 else
774 {
775 /*
776 Check if GROUP BY item trees contain the outer ref:
777 in this case we have to use Item_direct_ref instead of Item_ref.
778 */
779 for (ORDER *group= group_list; group; group= group->next)
780 {
781 if ((*group->item)->walk(&Item::find_item_processor, TRUE,
782 (uchar *) ref))
783 {
784 direct_ref= TRUE;
785 break;
786 }
787 }
788 }
789 new_ref= direct_ref ?
790 new Item_direct_ref(ref->context, item_ref, ref->table_name,
791 ref->field_name, ref->alias_name_used) :
792 new Item_ref(ref->context, item_ref, ref->table_name,
793 ref->field_name, ref->alias_name_used);
794 if (!new_ref)
795 return TRUE;
796 ref->outer_ref= new_ref;
797 ref->ref= &ref->outer_ref;
798
799 if (!ref->fixed && ref->fix_fields(thd, 0))
800 return TRUE;
801 thd->lex->used_tables|= item->used_tables();
802 thd->lex->current_select->select_list_tables|= item->used_tables();
803 }
804 return false;
805 }
806
807
808 /**
809 Since LIMIT is not supported for table subquery predicates
810 (IN/ALL/EXISTS/etc), the following clauses are redundant for
811 subqueries:
812
813 ORDER BY
814 DISTINCT
815 GROUP BY if there are no aggregate functions and no HAVING
816 clause
817
818 Because redundant clauses are removed both from JOIN and
819 select_lex, the removal is permanent. Thus, it only makes sense to
820 call this function for normal queries and on first execution of
821 SP/PS
822
823 @param subq_select_lex select_lex that is part of a subquery
824 predicate. This object and the associated
825 join is modified.
826 @param hidden_group_field_count Number of hidden group fields added
827 by setup_group().
828 @param hidden_order_field_count Number of hidden order fields added
829 by setup_order().
830 @param fields Fields list from which to remove items.
831 @param ref_pointer_array Pointers to top level of all_fields.
832 */
833
834 static
remove_redundant_subquery_clauses(st_select_lex * subq_select_lex,int hidden_group_field_count,int hidden_order_field_count,List<Item> & fields,Ref_ptr_array ref_pointer_array)835 void remove_redundant_subquery_clauses(st_select_lex *subq_select_lex,
836 int hidden_group_field_count,
837 int hidden_order_field_count,
838 List<Item> &fields,
839 Ref_ptr_array ref_pointer_array)
840 {
841 Item_subselect *subq_predicate= subq_select_lex->master_unit()->item;
842 /*
843 The removal should happen for IN, ALL, ANY and EXISTS subqueries,
844 which means all but single row subqueries. Example single row
845 subqueries:
846 a) SELECT * FROM t1 WHERE t1.a = (<single row subquery>)
847 b) SELECT a, (<single row subquery) FROM t1
848 */
849 if (subq_predicate->substype() == Item_subselect::SINGLEROW_SUBS)
850 return;
851
852 // A subquery that is not single row should be one of IN/ALL/ANY/EXISTS.
853 DBUG_ASSERT (subq_predicate->substype() == Item_subselect::EXISTS_SUBS ||
854 subq_predicate->substype() == Item_subselect::IN_SUBS ||
855 subq_predicate->substype() == Item_subselect::ALL_SUBS ||
856 subq_predicate->substype() == Item_subselect::ANY_SUBS);
857
858 enum change
859 {
860 REMOVE_NONE=0,
861 REMOVE_ORDER= 1 << 0,
862 REMOVE_DISTINCT= 1 << 1,
863 REMOVE_GROUP= 1 << 2
864 };
865
866 uint changelog= 0;
867
868 if (subq_select_lex->order_list.elements)
869 {
870 changelog|= REMOVE_ORDER;
871 for (ORDER *o= subq_select_lex->order_list.first; o != NULL; o= o->next)
872 {
873 if (*o->item == o->item_ptr)
874 (*o->item)->walk(&Item::clean_up_after_removal, true,
875 static_cast<uchar*>(static_cast<void*>(subq_select_lex)));
876 }
877 subq_select_lex->join->order= NULL;
878 subq_select_lex->order_list.empty();
879 while (hidden_order_field_count-- > 0)
880 {
881 fields.pop();
882 ref_pointer_array[fields.elements]= NULL;
883 }
884 }
885
886 if (subq_select_lex->options & SELECT_DISTINCT)
887 {
888 changelog|= REMOVE_DISTINCT;
889 subq_select_lex->join->select_distinct= false;
890 subq_select_lex->options&= ~SELECT_DISTINCT;
891 }
892
893 /*
894 Remove GROUP BY if there are no aggregate functions and no HAVING
895 clause
896 */
897 if (subq_select_lex->group_list.elements &&
898 !subq_select_lex->with_sum_func && !subq_select_lex->join->having)
899 {
900 changelog|= REMOVE_GROUP;
901 for (ORDER *g= subq_select_lex->group_list.first; g != NULL; g= g->next)
902 {
903 if (*g->item == g->item_ptr)
904 (*g->item)->walk(&Item::clean_up_after_removal, true,
905 static_cast<uchar*>(static_cast<void*>(subq_select_lex)));
906 }
907 subq_select_lex->join->group_list= NULL;
908 subq_select_lex->group_list.empty();
909 while (hidden_group_field_count-- > 0)
910 {
911 fields.pop();
912 ref_pointer_array[fields.elements]= NULL;
913 }
914 }
915
916 if (changelog)
917 {
918 Opt_trace_context * trace= &subq_select_lex->join->thd->opt_trace;
919 if (unlikely(trace->is_started()))
920 {
921 Opt_trace_object trace_wrapper(trace);
922 Opt_trace_array trace_changes(trace, "transformations_to_subquery");
923 if (changelog & REMOVE_ORDER)
924 trace_changes.add_alnum("removed_ordering");
925 if (changelog & REMOVE_DISTINCT)
926 trace_changes.add_alnum("removed_distinct");
927 if (changelog & REMOVE_GROUP)
928 trace_changes.add_alnum("removed_grouping");
929 }
930 }
931 }
932
933
934 /**
935 Function to setup clauses without sum functions.
936 */
937 static inline int
setup_without_group(THD * thd,Ref_ptr_array ref_pointer_array,TABLE_LIST * tables,TABLE_LIST * leaves,List<Item> & fields,List<Item> & all_fields,Item ** conds,ORDER * order,ORDER * group,int * hidden_group_field_count,int * hidden_order_field_count)938 setup_without_group(THD *thd, Ref_ptr_array ref_pointer_array,
939 TABLE_LIST *tables,
940 TABLE_LIST *leaves,
941 List<Item> &fields,
942 List<Item> &all_fields,
943 Item **conds,
944 ORDER *order,
945 ORDER *group,
946 int *hidden_group_field_count,
947 int *hidden_order_field_count)
948 {
949 int res;
950 st_select_lex *const select= thd->lex->current_select;
951 nesting_map save_allow_sum_func=thd->lex->allow_sum_func;
952 /*
953 Need to save the value, so we can turn off only any new non_agg_field_used
954 additions coming from the WHERE
955 */
956 const bool saved_non_agg_field_used= select->non_agg_field_used();
957 DBUG_ENTER("setup_without_group");
958
959 thd->lex->allow_sum_func&= ~((nesting_map)1 << select->nest_level);
960 res= setup_conds(thd, tables, leaves, conds);
961
962 /* it's not wrong to have non-aggregated columns in a WHERE */
963 select->set_non_agg_field_used(saved_non_agg_field_used);
964
965 // GROUP BY
966 int all_fields_count= all_fields.elements;
967 res= res || setup_group(thd, ref_pointer_array, tables, fields, all_fields,
968 group);
969 *hidden_group_field_count= all_fields.elements - all_fields_count;
970
971 // ORDER BY
972 all_fields_count= all_fields.elements;
973 thd->lex->allow_sum_func|= (nesting_map)1 << select->nest_level;
974 res= res || setup_order(thd, ref_pointer_array, tables, fields, all_fields,
975 order);
976 *hidden_order_field_count= all_fields.elements - all_fields_count;
977
978 res= res || match_exprs_for_only_full_group_by(thd, all_fields,
979 *hidden_group_field_count,
980 *hidden_order_field_count,
981 fields.elements, group);
982
983 thd->lex->allow_sum_func= save_allow_sum_func;
984 DBUG_RETURN(res);
985 }
986
987
988 /*****************************************************************************
989 Group and order functions
990 *****************************************************************************/
991
992 /**
993 Resolve an ORDER BY or GROUP BY column reference.
994
995 Given a column reference (represented by 'order') from a GROUP BY or ORDER
996 BY clause, find the actual column it represents. If the column being
997 resolved is from the GROUP BY clause, the procedure searches the SELECT
998 list 'fields' and the columns in the FROM list 'tables'. If 'order' is from
999 the ORDER BY clause, only the SELECT list is being searched.
1000
1001 If 'order' is resolved to an Item, then order->item is set to the found
1002 Item. If there is no item for the found column (that is, it was resolved
1003 into a table field), order->item is 'fixed' and is added to all_fields and
1004 ref_pointer_array.
1005
1006 ref_pointer_array and all_fields are updated.
1007
1008 @param[in] thd Pointer to current thread structure
1009 @param[in,out] ref_pointer_array All select, group and order by fields
1010 @param[in] tables List of tables to search in (usually
1011 FROM clause)
1012 @param[in] order Column reference to be resolved
1013 @param[in] fields List of fields to search in (usually
1014 SELECT list)
1015 @param[in,out] all_fields All select, group and order by fields
1016 @param[in] is_group_field True if order is a GROUP field, false if
1017 ORDER by field
1018
1019 @retval
1020 FALSE if OK
1021 @retval
1022 TRUE if error occurred
1023 */
1024
1025 static bool
find_order_in_list(THD * thd,Ref_ptr_array ref_pointer_array,TABLE_LIST * tables,ORDER * order,List<Item> & fields,List<Item> & all_fields,bool is_group_field)1026 find_order_in_list(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables,
1027 ORDER *order, List<Item> &fields, List<Item> &all_fields,
1028 bool is_group_field)
1029 {
1030 Item *order_item= *order->item; /* The item from the GROUP/ORDER caluse. */
1031 Item::Type order_item_type;
1032 Item **select_item; /* The corresponding item from the SELECT clause. */
1033 Field *from_field; /* The corresponding field from the FROM clause. */
1034 uint counter;
1035 enum_resolution_type resolution;
1036
1037 /*
1038 Local SP variables may be int but are expressions, not positions.
1039 (And they can't be used before fix_fields is called for them).
1040 */
1041 if (order_item->type() == Item::INT_ITEM && order_item->basic_const_item())
1042 { /* Order by position */
1043 uint count= (uint) order_item->val_int();
1044 if (!count || count > fields.elements)
1045 {
1046 my_error(ER_BAD_FIELD_ERROR, MYF(0),
1047 order_item->full_name(), thd->where);
1048 return TRUE;
1049 }
1050 order->item= &ref_pointer_array[count - 1];
1051 order->in_field_list= 1;
1052 order->counter= count;
1053 order->counter_used= 1;
1054 return FALSE;
1055 }
1056 /* Lookup the current GROUP/ORDER field in the SELECT clause. */
1057 select_item= find_item_in_list(order_item, fields, &counter,
1058 REPORT_EXCEPT_NOT_FOUND, &resolution);
1059 if (!select_item)
1060 return TRUE; /* The item is not unique, or some other error occured. */
1061
1062
1063 /* Check whether the resolved field is not ambiguos. */
1064 if (select_item != not_found_item)
1065 {
1066 Item *view_ref= NULL;
1067 /*
1068 If we have found field not by its alias in select list but by its
1069 original field name, we should additionaly check if we have conflict
1070 for this name (in case if we would perform lookup in all tables).
1071 */
1072 if (resolution == RESOLVED_BEHIND_ALIAS && !order_item->fixed &&
1073 order_item->fix_fields(thd, order->item))
1074 return TRUE;
1075
1076 /* Lookup the current GROUP field in the FROM clause. */
1077 order_item_type= order_item->type();
1078 from_field= (Field*) not_found_field;
1079 if ((is_group_field &&
1080 order_item_type == Item::FIELD_ITEM) ||
1081 order_item_type == Item::REF_ITEM)
1082 {
1083 from_field= find_field_in_tables(thd, (Item_ident*) order_item, tables,
1084 NULL, &view_ref, IGNORE_ERRORS, TRUE,
1085 FALSE);
1086 if (!from_field)
1087 from_field= (Field*) not_found_field;
1088 }
1089
1090 if (from_field == not_found_field ||
1091 (from_field != view_ref_found ?
1092 /* it is field of base table => check that fields are same */
1093 ((*select_item)->type() == Item::FIELD_ITEM &&
1094 ((Item_field*) (*select_item))->field->eq(from_field)) :
1095 /*
1096 in is field of view table => check that references on translation
1097 table are same
1098 */
1099 ((*select_item)->type() == Item::REF_ITEM &&
1100 view_ref->type() == Item::REF_ITEM &&
1101 ((Item_ref *) (*select_item))->ref ==
1102 ((Item_ref *) view_ref)->ref)))
1103 {
1104 /*
1105 If there is no such field in the FROM clause, or it is the same field
1106 as the one found in the SELECT clause, then use the Item created for
1107 the SELECT field. As a result if there was a derived field that
1108 'shadowed' a table field with the same name, the table field will be
1109 chosen over the derived field.
1110
1111 If we replace *order->item with one from the select list or
1112 from a table in the FROM list, we should clean up after
1113 removing the old *order->item from the query. The item has not
1114 been fixed (so there are no aggregation functions that need
1115 cleaning up), but it may contain subqueries that should be
1116 unlinked.
1117 */
1118 if (*order->item != *select_item)
1119 (*order->item)->walk(&Item::clean_up_after_removal, true, NULL);
1120 order->item= &ref_pointer_array[counter];
1121 order->in_field_list=1;
1122 if (resolution == RESOLVED_AGAINST_ALIAS)
1123 order->used_alias= true;
1124 return FALSE;
1125 }
1126 else
1127 {
1128 /*
1129 There is a field with the same name in the FROM clause. This
1130 is the field that will be chosen. In this case we issue a
1131 warning so the user knows that the field from the FROM clause
1132 overshadows the column reference from the SELECT list.
1133 */
1134 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, ER_NON_UNIQ_ERROR,
1135 ER(ER_NON_UNIQ_ERROR),
1136 ((Item_ident*) order_item)->field_name,
1137 current_thd->where);
1138 }
1139 }
1140
1141 order->in_field_list=0;
1142 /*
1143 The call to order_item->fix_fields() means that here we resolve
1144 'order_item' to a column from a table in the list 'tables', or to
1145 a column in some outer query. Exactly because of the second case
1146 we come to this point even if (select_item == not_found_item),
1147 inspite of that fix_fields() calls find_item_in_list() one more
1148 time.
1149
1150 We check order_item->fixed because Item_func_group_concat can put
1151 arguments for which fix_fields already was called.
1152
1153 group_fix_field= TRUE is to resolve aliases from the SELECT list
1154 without creating of Item_ref-s: JOIN::exec() wraps aliased items
1155 in SELECT list with Item_copy items. To re-evaluate such a tree
1156 that includes Item_copy items we have to refresh Item_copy caches,
1157 but:
1158 - filesort() never refresh Item_copy items,
1159 - end_send_group() checks every record for group boundary by the
1160 test_if_group_changed function that obtain data from these
1161 Item_copy items, but the copy_fields function that
1162 refreshes Item copy items is called after group boundaries only -
1163 that is a vicious circle.
1164 So we prevent inclusion of Item_copy items.
1165 */
1166 bool save_group_fix_field= thd->lex->current_select->group_fix_field;
1167 if (is_group_field)
1168 thd->lex->current_select->group_fix_field= TRUE;
1169 bool ret= (!order_item->fixed &&
1170 (order_item->fix_fields(thd, order->item) ||
1171 (order_item= *order->item)->check_cols(1) ||
1172 thd->is_fatal_error));
1173 thd->lex->current_select->group_fix_field= save_group_fix_field;
1174 if (ret)
1175 return TRUE; /* Wrong field. */
1176
1177 uint el= all_fields.elements;
1178 all_fields.push_front(order_item); /* Add new field to field list. */
1179 ref_pointer_array[el]= order_item;
1180 /*
1181 If the order_item is a SUM_FUNC_ITEM, when fix_fields is called
1182 ref_by is set to order->item which is the address of order_item.
1183 But this needs to be address of order_item in the all_fields list.
1184 As a result, when it gets replaced with Item_aggregate_ref
1185 object in Item::split_sum_func2, we will be able to retrieve the
1186 newly created object.
1187 */
1188 if (order_item->type() == Item::SUM_FUNC_ITEM)
1189 ((Item_sum *)order_item)->ref_by= all_fields.head_ref();
1190
1191 /*
1192 Currently, we assume that this assertion holds. If it turns out
1193 that it fails for some query, order->item has changed and the old
1194 item is removed from the query. In that case, we must call walk()
1195 with clean_up_after_removal() on the old order->item.
1196 */
1197 DBUG_ASSERT(order_item == *order->item);
1198 order->item= &ref_pointer_array[el];
1199 return FALSE;
1200 }
1201
1202
1203 /**
1204 Change order to point at item in select list.
1205
1206 If item isn't a number and doesn't exists in the select list, add it to the
1207 the field list.
1208 */
1209
setup_order(THD * thd,Ref_ptr_array ref_pointer_array,TABLE_LIST * tables,List<Item> & fields,List<Item> & all_fields,ORDER * order)1210 int setup_order(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables,
1211 List<Item> &fields, List<Item> &all_fields, ORDER *order)
1212 {
1213 thd->where="order clause";
1214 DBUG_ASSERT(thd->lex->current_select->cur_pos_in_all_fields ==
1215 SELECT_LEX::ALL_FIELDS_UNDEF_POS);
1216 for (; order; order=order->next)
1217 {
1218 thd->lex->current_select->cur_pos_in_all_fields=
1219 fields.elements - all_fields.elements - 1;
1220 if (find_order_in_list(thd, ref_pointer_array, tables, order, fields,
1221 all_fields, FALSE))
1222 return 1;
1223 }
1224 thd->lex->current_select->cur_pos_in_all_fields=
1225 SELECT_LEX::ALL_FIELDS_UNDEF_POS;
1226 return 0;
1227 }
1228
1229
1230 /**
1231 Scans the SELECT list and ORDER BY list: for each expression, if it is not
1232 present in GROUP BY, examines the non-aggregated columns contained in the
1233 expression; if those columns are not all in GROUP BY, raise an error.
1234
1235 Examples:
1236 1) "SELECT a+1 FROM t GROUP BY a+1"
1237 "a+1" in SELECT list was found, by setup_group() (exactly
1238 find_order_in_list()), to be the same as "a+1" in GROUP BY; as it is a
1239 GROUP BY expression, setup_group() has marked this expression with
1240 ALL_FIELDS_UNDEF_POS (item->marker= ALL_FIELDS_UNDEF_POS).
1241 2) "SELECT a+1 FROM t GROUP BY a"
1242 "a+1" is not found in GROUP BY; its non-aggregated column is "a", "a" is
1243 present in GROUP BY so it's ok.
1244
1245 A "hidden" GROUP BY / ORDER BY expression is a member of GROUP BY / ORDER
1246 BY which was not found (by setup_order() or setup_group()) to be also
1247 present in the SELECT list. setup_order() and setup_group() have thus added
1248 the expression to the front of JOIN::all_fields.
1249
1250 @param thd THD pointer
1251 @param all_fields list of expressions, including SELECT list
1252 and hidden ORDER BY expressions
1253 @param hidden_group_exprs_count number of hidden GROUP BY expressions
1254 @param hidden_order_exprs_count number of hidden ORDER BY expressions
1255 @param select_exprs_count number of SELECT list expressions (there may
1256 be a gap between hidden GROUP BY expressions
1257 and SELECT list expressions)
1258 @param group_exprs GROUP BY expressions
1259
1260 @returns true if ONLY_FULL_GROUP_BY is violated.
1261 */
1262
1263 static bool
match_exprs_for_only_full_group_by(THD * thd,List<Item> & all_fields,int hidden_group_exprs_count,int hidden_order_exprs_count,int select_exprs_count,ORDER * group_exprs)1264 match_exprs_for_only_full_group_by(THD *thd, List<Item> &all_fields,
1265 int hidden_group_exprs_count,
1266 int hidden_order_exprs_count,
1267 int select_exprs_count,
1268 ORDER *group_exprs)
1269 {
1270 if (!group_exprs ||
1271 !(thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY))
1272 return false;
1273
1274 /*
1275 For all expressions of the SELECT list and ORDER BY, a list of columns
1276 which aren't under an aggregate function, 'select_lex->non_agg_fields',
1277 has been created (see Item_field::fix_fields()). Each column in that list
1278 keeps in Item::marker the position, in all_fields, of the (SELECT
1279 list or ORDER BY) expression which it belongs to (see
1280 st_select_lex::cur_pos_in_all_fields). all_fields looks like this:
1281 (front) HIDDEN ORDER BY - HIDDEN GROUP BY - gap - SELECT LIST (back)
1282 "Gap" may contain some aggregate expressions (see Item::split_sum_func2())
1283 which are irrelevant to us.
1284
1285 We take an expressions of the SELECT list or a hidden ORDER BY expression
1286 ('expr' variable).
1287 - (1) If it also belongs to the GROUP BY list, it's ok.
1288 - (2) If it is an aggregate function, it's ok.
1289 - (3) If is is a constant, it's ok.
1290 - (4) If it is a column resolved to an outer SELECT it's ok;
1291 indeed, it is a constant from the point of view of one execution of the
1292 inner SELECT - it does not introduce any randomness in the result.
1293 - Otherwise we scan the list of non-aggregated columns and if we find at
1294 least one column belonging to this expression and NOT occuring
1295 in the GROUP BY list, we throw an error.
1296 */
1297 List_iterator<Item> exprs_it(all_fields);
1298 /*
1299 All "idx*" variables below are indices in all_fields, with "index of
1300 front" = 0 and "index of back" = all_fields.elements - 1.
1301 */
1302 int idx= -1;
1303 const int idx_of_first_hidden_group= hidden_order_exprs_count;
1304 const int idx_of_first_select= all_fields.elements - select_exprs_count;
1305 /*
1306 Also an index in all_fields, but with the same counting convention as
1307 st_select_lex::cur_pos_in_all_fields.
1308 */
1309 int cur_pos_in_all_fields;
1310 Item *expr;
1311 Item_field *non_agg_field;
1312 List_iterator<Item_field>
1313 non_agg_fields_it(thd->lex->current_select->non_agg_fields);
1314
1315 non_agg_field= non_agg_fields_it++;
1316 while (non_agg_field && (expr= exprs_it++))
1317 {
1318 idx++;
1319 if (idx >= idx_of_first_hidden_group && // In or after hidden GROUP BY
1320 idx < idx_of_first_select) // but not yet in SELECT list
1321 continue;
1322 cur_pos_in_all_fields= idx - idx_of_first_select;
1323
1324 if ((expr->marker == SELECT_LEX::ALL_FIELDS_UNDEF_POS) || // (1)
1325 expr->type() == Item::SUM_FUNC_ITEM || // (2)
1326 expr->const_item() || // (3)
1327 (expr->real_item()->type() == Item::FIELD_ITEM &&
1328 expr->used_tables() & OUTER_REF_TABLE_BIT)) // (4)
1329 continue; // Ignore this expression.
1330
1331 while (non_agg_field)
1332 {
1333 /*
1334 All non-aggregated columns contained in 'expr' have their
1335 'marker' equal to 'cur_pos_in_all_fields' OR equal to
1336 ALL_FIELDS_UNDEF_POS. The latter case happens in:
1337 "SELECT a FROM t GROUP BY a"
1338 when setup_group() finds that "a" in GROUP BY is also in the
1339 SELECT list ('fields' list); setup_group() marks the "a" expression
1340 with ALL_FIELDS_UNDEF_POS; at the same time, "a" is also a
1341 non-aggregated column of the "a" expression; thus, non-aggregated
1342 column "a" had its marker change from >=0 to
1343 ALL_FIELDS_UNDEF_POS. Such non-aggregated column can be ignored (and
1344 that is why ALL_FIELDS_UNDEF_POS is a very negative number).
1345 */
1346 if (non_agg_field->marker < cur_pos_in_all_fields)
1347 {
1348 /*
1349 Ignorable column, or the owning expression was found to be
1350 ignorable (cases 1-2-3-4 above); ignore it and switch to next
1351 column.
1352 */
1353 goto next_non_agg_field;
1354 }
1355 if (non_agg_field->marker > cur_pos_in_all_fields)
1356 {
1357 /*
1358 'expr' has been passed (we have scanned all its non-aggregated
1359 columns and are seeing one which belongs to a next expression),
1360 switch to next expression.
1361 */
1362 break;
1363 }
1364 // Check whether the non-aggregated column occurs in the GROUP BY list
1365 for (ORDER *grp= group_exprs; grp; grp= grp->next)
1366 if ((*grp->item)->eq(static_cast<Item *>(non_agg_field), false))
1367 {
1368 // column is in GROUP BY so is ok; check the next
1369 goto next_non_agg_field;
1370 }
1371 /*
1372 If we come here, one non-aggregated column belonging to 'expr' was
1373 not found in GROUP BY, we raise an error.
1374 TODO: change ER_WRONG_FIELD_WITH_GROUP to more detailed
1375 ER_NON_GROUPING_FIELD_USED
1376 */
1377 my_error(ER_WRONG_FIELD_WITH_GROUP, MYF(0), non_agg_field->full_name());
1378 return true;
1379 next_non_agg_field:
1380 non_agg_field= non_agg_fields_it++;
1381 }
1382 }
1383 return false;
1384 }
1385
1386
1387 /**
1388 Initialize the GROUP BY list.
1389
1390 @param thd Thread handler
1391 @param ref_pointer_array We store references to all fields that was
1392 not in 'fields' here.
1393 @param fields All fields in the select part. Any item in
1394 'order' that is part of these list is replaced
1395 by a pointer to this fields.
1396 @param all_fields Total list of all unique fields used by the
1397 select. All items in 'order' that was not part
1398 of fields will be added first to this list.
1399 @param order The fields we should do GROUP BY on.
1400
1401 @todo
1402 change ER_WRONG_FIELD_WITH_GROUP to more detailed
1403 ER_NON_GROUPING_FIELD_USED
1404
1405 @retval
1406 0 ok
1407 @retval
1408 1 error (probably out of memory)
1409 */
1410
1411 static int
setup_group(THD * thd,Ref_ptr_array ref_pointer_array,TABLE_LIST * tables,List<Item> & fields,List<Item> & all_fields,ORDER * order)1412 setup_group(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables,
1413 List<Item> &fields, List<Item> &all_fields, ORDER *order)
1414 {
1415 if (!order)
1416 return 0; /* Everything is ok */
1417
1418 thd->where="group statement";
1419 for (ORDER *ord= order; ord; ord= ord->next)
1420 {
1421 if (find_order_in_list(thd, ref_pointer_array, tables, ord, fields,
1422 all_fields, TRUE))
1423 return 1;
1424 // ONLY_FULL_GROUP_BY needn't verify this expression:
1425 (*ord->item)->marker= SELECT_LEX::ALL_FIELDS_UNDEF_POS;
1426 if ((*ord->item)->with_sum_func)
1427 {
1428 my_error(ER_WRONG_GROUP_FIELD, MYF(0), (*ord->item)->full_name());
1429 return 1;
1430 }
1431 }
1432 return 0;
1433 }
1434
1435
1436 /****************************************************************************
1437 ROLLUP handling
1438 ****************************************************************************/
1439
1440 /**
1441 Replace occurences of group by fields in an expression by ref items.
1442
1443 The function replaces occurrences of group by fields in expr
1444 by ref objects for these fields unless they are under aggregate
1445 functions.
1446 The function also corrects value of the the maybe_null attribute
1447 for the items of all subexpressions containing group by fields.
1448
1449 @b EXAMPLES
1450 @code
1451 SELECT a+1 FROM t1 GROUP BY a WITH ROLLUP
1452 SELECT SUM(a)+a FROM t1 GROUP BY a WITH ROLLUP
1453 @endcode
1454
1455 @b IMPLEMENTATION
1456
1457 The function recursively traverses the tree of the expr expression,
1458 looks for occurrences of the group by fields that are not under
1459 aggregate functions and replaces them for the corresponding ref items.
1460
1461 @note
1462 This substitution is needed GROUP BY queries with ROLLUP if
1463 SELECT list contains expressions over group by attributes.
1464
1465 @param thd reference to the context
1466 @param expr expression to make replacement
1467 @param group_list list of references to group by items
1468 @param changed out: returns 1 if item contains a replaced field item
1469
1470 @todo
1471 - TODO: Some functions are not null-preserving. For those functions
1472 updating of the maybe_null attribute is an overkill.
1473
1474 @retval
1475 0 if ok
1476 @retval
1477 1 on error
1478 */
1479
change_group_ref(THD * thd,Item_func * expr,ORDER * group_list,bool * changed)1480 static bool change_group_ref(THD *thd, Item_func *expr, ORDER *group_list,
1481 bool *changed)
1482 {
1483 if (expr->arg_count)
1484 {
1485 Name_resolution_context *context= &thd->lex->current_select->context;
1486 Item **arg,**arg_end;
1487 bool arg_changed= FALSE;
1488 for (arg= expr->arguments(),
1489 arg_end= expr->arguments()+expr->arg_count;
1490 arg != arg_end; arg++)
1491 {
1492 Item *item= *arg;
1493 if (item->type() == Item::FIELD_ITEM || item->type() == Item::REF_ITEM)
1494 {
1495 ORDER *group_tmp;
1496 for (group_tmp= group_list; group_tmp; group_tmp= group_tmp->next)
1497 {
1498 if (item->eq(*group_tmp->item,0))
1499 {
1500 Item *new_item;
1501 if (!(new_item= new Item_ref(context, group_tmp->item, 0,
1502 item->item_name.ptr())))
1503 return 1; // fatal_error is set
1504 thd->change_item_tree(arg, new_item);
1505 arg_changed= TRUE;
1506 }
1507 }
1508 }
1509 else if (item->type() == Item::FUNC_ITEM)
1510 {
1511 if (change_group_ref(thd, (Item_func *) item, group_list, &arg_changed))
1512 return 1;
1513 }
1514 }
1515 if (arg_changed)
1516 {
1517 expr->maybe_null= 1;
1518 *changed= TRUE;
1519 }
1520 }
1521 return 0;
1522 }
1523
1524
1525 /** Allocate memory needed for other rollup functions. */
1526
rollup_init()1527 bool JOIN::rollup_init()
1528 {
1529 uint i,j;
1530 Item **ref_array;
1531 ORDER *group_tmp;
1532
1533 tmp_table_param.quick_group= 0; // Can't create groups in tmp table
1534 rollup.state= ROLLUP::STATE_INITED;
1535
1536 /*
1537 Create pointers to the different sum function groups
1538 These are updated by rollup_make_fields()
1539 */
1540 tmp_table_param.group_parts= send_group_parts;
1541
1542 Item_null_result **null_items=
1543 static_cast<Item_null_result**>(thd->alloc(sizeof(Item*)*send_group_parts));
1544
1545 rollup.null_items= Item_null_array(null_items, send_group_parts);
1546 rollup.ref_pointer_arrays=
1547 static_cast<Ref_ptr_array*>
1548 (thd->alloc((sizeof(Ref_ptr_array) +
1549 all_fields.elements * sizeof(Item*)) * send_group_parts));
1550 rollup.fields=
1551 static_cast<List<Item>*>(thd->alloc(sizeof(List<Item>) * send_group_parts));
1552
1553 if (!null_items || !rollup.ref_pointer_arrays || !rollup.fields)
1554 return true;
1555
1556 ref_array= (Item**) (rollup.ref_pointer_arrays+send_group_parts);
1557
1558 /*
1559 Prepare space for field list for the different levels
1560 These will be filled up in rollup_make_fields()
1561 */
1562 group_tmp= group_list;
1563 for (i= 0 ; i < send_group_parts ; i++)
1564 {
1565 rollup.null_items[i]=
1566 new (thd->mem_root) Item_null_result((*group_tmp->item)->field_type(),
1567 (*group_tmp->item)->result_type());
1568 List<Item> *rollup_fields= &rollup.fields[i];
1569 rollup_fields->empty();
1570 rollup.ref_pointer_arrays[i]= Ref_ptr_array(ref_array, all_fields.elements);
1571 ref_array+= all_fields.elements;
1572 group_tmp= group_tmp->next;
1573 }
1574 for (i= 0 ; i < send_group_parts; i++)
1575 {
1576 for (j=0 ; j < fields_list.elements ; j++)
1577 rollup.fields[i].push_back(rollup.null_items[i]);
1578 }
1579 List_iterator<Item> it(all_fields);
1580 Item *item;
1581 while ((item= it++))
1582 {
1583 bool found_in_group= 0;
1584
1585 for (group_tmp= group_list; group_tmp; group_tmp= group_tmp->next)
1586 {
1587 if (*group_tmp->item == item)
1588 {
1589 item->maybe_null= 1;
1590 found_in_group= 1;
1591 break;
1592 }
1593 }
1594 if (item->type() == Item::FUNC_ITEM && !found_in_group)
1595 {
1596 bool changed= FALSE;
1597 if (change_group_ref(thd, (Item_func *) item, group_list, &changed))
1598 return 1;
1599 /*
1600 We have to prevent creation of a field in a temporary table for
1601 an expression that contains GROUP BY attributes.
1602 Marking the expression item as 'with_sum_func' will ensure this.
1603 */
1604 if (changed)
1605 item->with_sum_func= 1;
1606 }
1607 }
1608 return 0;
1609 }
1610
1611 /**
1612 @} (end of group Query_Resolver)
1613 */
1614