1 /* Copyright (c) 2000, 2021, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23 /**
24 @file
25
26 @brief
27 Query execution
28
29
30 @defgroup Query_Executor Query Executor
31 @{
32 */
33
34 #include "sql_executor.h"
35
36 #include "debug_sync.h" // DEBUG_SYNC
37 #include "item_sum.h" // Item_sum
38 #include "key.h" // key_cmp
39 #include "log.h" // sql_print_error
40 #include "opt_trace.h" // Opt_trace_object
41 #include "sql_base.h" // fill_record
42 #include "sql_join_buffer.h" // st_cache_field
43 #include "sql_optimizer.h" // JOIN
44 #include "sql_show.h" // get_schema_tables_result
45 #include "sql_tmp_table.h" // create_tmp_table
46 #include "json_dom.h" // Json_wrapper
47
48 #include <algorithm>
49 using std::max;
50 using std::min;
51
52 static void return_zero_rows(JOIN *join, List<Item> &fields);
53 static void save_const_null_info(JOIN *join, table_map *save_nullinfo);
54 static void restore_const_null_info(JOIN *join, table_map save_nullinfo);
55 static int do_select(JOIN *join);
56
57 static enum_nested_loop_state
58 evaluate_join_record(JOIN *join, QEP_TAB *qep_tab);
59 static enum_nested_loop_state
60 evaluate_null_complemented_join_record(JOIN *join, QEP_TAB *qep_tab);
61 static enum_nested_loop_state
62 end_send(JOIN *join, QEP_TAB *qep_tab, bool end_of_records);
63 static enum_nested_loop_state
64 end_write(JOIN *join, QEP_TAB *qep_tab, bool end_of_records);
65 static enum_nested_loop_state
66 end_update(JOIN *join, QEP_TAB *qep_tab, bool end_of_records);
67 static void copy_sum_funcs(Item_sum **func_ptr, Item_sum **end_ptr);
68
69 static int read_system(TABLE *table);
70 static int join_read_const(QEP_TAB *tab);
71 static int read_const(TABLE *table, TABLE_REF *ref);
72 static int join_read_key(QEP_TAB *tab);
73 static int join_read_always_key(QEP_TAB *tab);
74 static int join_no_more_records(READ_RECORD *info);
75 static int join_read_next(READ_RECORD *info);
76 static int join_read_next_same(READ_RECORD *info);
77 static int join_read_prev(READ_RECORD *info);
78 static int join_ft_read_first(QEP_TAB *tab);
79 static int join_ft_read_next(READ_RECORD *info);
80 static int join_read_always_key_or_null(QEP_TAB *tab);
81 static int join_read_next_same_or_null(READ_RECORD *info);
82 static int create_sort_index(THD *thd, JOIN *join, QEP_TAB *tab);
83 static bool remove_dup_with_compare(THD *thd, TABLE *entry, Field **field,
84 ulong offset,Item *having);
85 static bool remove_dup_with_hash_index(THD *thd,TABLE *table,
86 uint field_count, Field **first_field,
87 ulong key_length,Item *having);
88 static int join_read_linked_first(QEP_TAB *tab);
89 static int join_read_linked_next(READ_RECORD *info);
90 static int do_sj_reset(SJ_TMP_TABLE *sj_tbl);
91 static bool cmp_buffer_with_ref(THD *thd, TABLE *table, TABLE_REF *tab_ref);
92
93 /**
94 Execute select, executor entry point.
95
96 @todo
97 When can we have here thd->net.report_error not zero?
98
99 @note that EXPLAIN may come here (single-row derived table, uncorrelated
100 scalar subquery in WHERE clause...).
101 */
102
103 void
exec()104 JOIN::exec()
105 {
106 Opt_trace_context * const trace= &thd->opt_trace;
107 Opt_trace_object trace_wrapper(trace);
108 Opt_trace_object trace_exec(trace, "join_execution");
109 trace_exec.add_select_number(select_lex->select_number);
110 Opt_trace_array trace_steps(trace, "steps");
111 List<Item> *columns_list= &fields_list;
112 DBUG_ENTER("JOIN::exec");
113
114 assert(select_lex == thd->lex->current_select());
115
116 /*
117 Check that we either
118 - have no tables, or
119 - have tables and have locked them, or
120 - called for fake_select_lex, which may have temporary tables which do
121 not need locking up front.
122 */
123 assert(!tables || thd->lex->is_query_tables_locked() ||
124 select_lex == unit->fake_select_lex);
125
126 THD_STAGE_INFO(thd, stage_executing);
127 DEBUG_SYNC(thd, "before_join_exec");
128
129 set_executed();
130
131 if (prepare_result())
132 DBUG_VOID_RETURN;
133
134 Query_result *const query_result= select_lex->query_result();
135
136 do_send_rows = unit->select_limit_cnt > 0;
137
138 if (!tables_list && (tables || !select_lex->with_sum_func))
139 { // Only test of functions
140 /*
141 We have to test for 'conds' here as the WHERE may not be constant
142 even if we don't have any tables for prepared statements or if
143 conds uses something like 'rand()'.
144
145 Don't evaluate the having clause here. return_zero_rows() should
146 be called only for cases where there are no matching rows after
147 evaluating all conditions except the HAVING clause.
148 */
149 if (select_lex->cond_value != Item::COND_FALSE &&
150 (!where_cond || where_cond->val_int()))
151 {
152 if (query_result->send_result_set_metadata(*columns_list,
153 Protocol::SEND_NUM_ROWS |
154 Protocol::SEND_EOF))
155 DBUG_VOID_RETURN;
156
157 /*
158 If the HAVING clause is either impossible or always true, then
159 JOIN::having is set to NULL by optimize_cond.
160 In this case JOIN::exec must check for JOIN::having_value, in the
161 same way it checks for JOIN::cond_value.
162 */
163 if (((select_lex->having_value != Item::COND_FALSE) &&
164 (!having_cond || having_cond->val_int()))
165 && do_send_rows && query_result->send_data(fields_list))
166 error= 1;
167 else
168 {
169 error= (int) query_result->send_eof();
170 send_records= calc_found_rows ? 1 : thd->get_sent_row_count();
171 }
172 /* Query block (without union) always returns 0 or 1 row */
173 thd->current_found_rows= send_records;
174 }
175 else
176 {
177 return_zero_rows(this, *columns_list);
178 }
179 DBUG_VOID_RETURN;
180 }
181
182 if (zero_result_cause)
183 {
184 return_zero_rows(this, *columns_list);
185 DBUG_VOID_RETURN;
186 }
187
188 /*
189 Initialize examined rows here because the values from all join parts
190 must be accumulated in examined_row_count. Hence every join
191 iteration must count from zero.
192 */
193 examined_rows= 0;
194
195 /* XXX: When can we have here thd->is_error() not zero? */
196 if (thd->is_error())
197 {
198 error= thd->is_error();
199 DBUG_VOID_RETURN;
200 }
201
202 THD_STAGE_INFO(thd, stage_sending_data);
203 DBUG_PRINT("info", ("%s", thd->proc_info));
204 query_result->send_result_set_metadata(*fields,
205 Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF);
206 error= do_select(this);
207 /* Accumulate the counts from all join iterations of all join parts. */
208 thd->inc_examined_row_count(examined_rows);
209 DBUG_PRINT("counts", ("thd->examined_row_count: %lu",
210 (ulong) thd->get_examined_row_count()));
211
212 DBUG_VOID_RETURN;
213 }
214
215
216 bool
create_intermediate_table(QEP_TAB * const tab,List<Item> * tmp_table_fields,ORDER_with_src & tmp_table_group,bool save_sum_fields)217 JOIN::create_intermediate_table(QEP_TAB *const tab,
218 List<Item> *tmp_table_fields,
219 ORDER_with_src &tmp_table_group,
220 bool save_sum_fields)
221 {
222 DBUG_ENTER("JOIN::create_intermediate_table");
223 THD_STAGE_INFO(thd, stage_creating_tmp_table);
224
225 /*
226 Pushing LIMIT to the temporary table creation is not applicable
227 when there is ORDER BY or GROUP BY or there is no GROUP BY, but
228 there are aggregate functions, because in all these cases we need
229 all result rows.
230 */
231 ha_rows tmp_rows_limit= ((order == NULL || skip_sort_order) &&
232 !tmp_table_group &&
233 !select_lex->with_sum_func) ?
234 m_select_limit : HA_POS_ERROR;
235
236 tab->tmp_table_param= new (thd->mem_root) Temp_table_param(tmp_table_param);
237 tab->tmp_table_param->skip_create_table= true;
238 TABLE* table= create_tmp_table(thd, tab->tmp_table_param, *tmp_table_fields,
239 tmp_table_group, select_distinct && !group_list,
240 save_sum_fields, select_lex->active_options(),
241 tmp_rows_limit, "");
242 if (!table)
243 DBUG_RETURN(true);
244 tmp_table_param.using_outer_summary_function=
245 tab->tmp_table_param->using_outer_summary_function;
246
247 assert(tab->idx() > 0);
248 tab[-1].next_select= sub_select_op;
249 if (!(tab->op= new (thd->mem_root) QEP_tmp_table(tab)))
250 goto err;
251
252 tab->set_table(table);
253
254 if (table->group)
255 {
256 explain_flags.set(tmp_table_group.src, ESP_USING_TMPTABLE);
257 }
258 if (table->distinct || select_distinct)
259 {
260 explain_flags.set(ESC_DISTINCT, ESP_USING_TMPTABLE);
261 }
262 if ((!group_list && !order && !select_distinct) ||
263 (select_lex->active_options() &
264 (SELECT_BIG_RESULT | OPTION_BUFFER_RESULT)))
265 {
266 explain_flags.set(ESC_BUFFER_RESULT, ESP_USING_TMPTABLE);
267 }
268 /* if group or order on first table, sort first */
269 if (group_list && simple_group)
270 {
271 DBUG_PRINT("info",("Sorting for group"));
272 THD_STAGE_INFO(thd, stage_sorting_for_group);
273
274 if (ordered_index_usage != ordered_index_group_by &&
275 qep_tab[const_tables].type() != JT_CONST && // Don't sort 1 row
276 add_sorting_to_table(const_tables, &group_list))
277 goto err;
278
279 if (alloc_group_fields(this, group_list))
280 goto err;
281 if (make_sum_func_list(all_fields, fields_list, true))
282 goto err;
283 const bool need_distinct=
284 !(tab->quick() && tab->quick()->is_agg_loose_index_scan());
285 if (prepare_sum_aggregators(sum_funcs, need_distinct))
286 goto err;
287 if (setup_sum_funcs(thd, sum_funcs))
288 goto err;
289 group_list= NULL;
290 }
291 else
292 {
293 if (make_sum_func_list(all_fields, fields_list, false))
294 goto err;
295 const bool need_distinct=
296 !(tab->quick() && tab->quick()->is_agg_loose_index_scan());
297 if (prepare_sum_aggregators(sum_funcs, need_distinct))
298 goto err;
299 if (setup_sum_funcs(thd, sum_funcs))
300 goto err;
301
302 if (!group_list && !table->distinct && order && simple_order)
303 {
304 DBUG_PRINT("info",("Sorting for order"));
305 THD_STAGE_INFO(thd, stage_sorting_for_order);
306
307 if (ordered_index_usage != ordered_index_order_by &&
308 add_sorting_to_table(const_tables, &order))
309 goto err;
310 order= NULL;
311 }
312 }
313 DBUG_RETURN(false);
314
315 err:
316 if (table != NULL)
317 {
318 free_tmp_table(thd, table);
319 tab->set_table(NULL);
320 }
321 DBUG_RETURN(true);
322 }
323
324
325 /**
326 Send all rollup levels higher than the current one to the client.
327
328 @b SAMPLE
329 @code
330 SELECT a, b, c SUM(b) FROM t1 GROUP BY a,b WITH ROLLUP
331 @endcode
332
333 @param idx Level we are on:
334 - 0 = Total sum level
335 - 1 = First group changed (a)
336 - 2 = Second group changed (a,b)
337
338 @retval
339 0 ok
340 @retval
341 1 If send_data_failed()
342 */
343
rollup_send_data(uint idx)344 int JOIN::rollup_send_data(uint idx)
345 {
346 uint i;
347 for (i= send_group_parts ; i-- > idx ; )
348 {
349 /* Get reference pointers to sum functions in place */
350 copy_ref_ptr_array(ref_ptrs, rollup.ref_pointer_arrays[i]);
351 if ((!having_cond || having_cond->val_int()))
352 {
353 if (send_records < unit->select_limit_cnt && do_send_rows &&
354 select_lex->query_result()->send_data(rollup.fields[i]))
355 return 1;
356 send_records++;
357 }
358 }
359 /* Restore ref_pointer_array */
360 set_items_ref_array(current_ref_ptrs);
361 return 0;
362 }
363
364
365 /**
366 Write all rollup levels higher than the current one to a temp table.
367
368 @b SAMPLE
369 @code
370 SELECT a, b, SUM(c) FROM t1 GROUP BY a,b WITH ROLLUP
371 @endcode
372
373 @param idx Level we are on:
374 - 0 = Total sum level
375 - 1 = First group changed (a)
376 - 2 = Second group changed (a,b)
377 @param table reference to temp table
378
379 @retval
380 0 ok
381 @retval
382 1 if write_data_failed()
383 */
384
rollup_write_data(uint idx,TABLE * table_arg)385 int JOIN::rollup_write_data(uint idx, TABLE *table_arg)
386 {
387 uint i;
388 for (i= send_group_parts ; i-- > idx ; )
389 {
390 /* Get reference pointers to sum functions in place */
391 copy_ref_ptr_array(ref_ptrs, rollup.ref_pointer_arrays[i]);
392 if ((!having_cond || having_cond->val_int()))
393 {
394 int write_error;
395 Item *item;
396 List_iterator_fast<Item> it(rollup.fields[i]);
397 while ((item= it++))
398 {
399 if (item->type() == Item::NULL_ITEM && item->is_result_field())
400 item->save_in_result_field(1);
401 }
402 copy_sum_funcs(sum_funcs_end[i+1], sum_funcs_end[i]);
403 if ((write_error= table_arg->file->ha_write_row(table_arg->record[0])))
404 {
405 if (create_ondisk_from_heap(thd, table_arg,
406 tmp_table_param.start_recinfo,
407 &tmp_table_param.recinfo,
408 write_error, FALSE, NULL))
409 return 1;
410 }
411 }
412 }
413 /* Restore ref_pointer_array */
414 set_items_ref_array(current_ref_ptrs);
415 return 0;
416 }
417
418
419 void
optimize_distinct()420 JOIN::optimize_distinct()
421 {
422 for (int i= primary_tables - 1; i >= 0; --i)
423 {
424 QEP_TAB *last_tab= qep_tab + i;
425 if (select_lex->select_list_tables & last_tab->table_ref->map())
426 break;
427 last_tab->not_used_in_distinct= true;
428 }
429
430 /* Optimize "select distinct b from t1 order by key_part_1 limit #" */
431 if (order && skip_sort_order)
432 {
433 /* Should already have been optimized away */
434 assert(ordered_index_usage == ordered_index_order_by);
435 if (ordered_index_usage == ordered_index_order_by)
436 {
437 order= NULL;
438 }
439 }
440 }
441
prepare_sum_aggregators(Item_sum ** func_ptr,bool need_distinct)442 bool prepare_sum_aggregators(Item_sum **func_ptr, bool need_distinct)
443 {
444 Item_sum *func;
445 DBUG_ENTER("prepare_sum_aggregators");
446 while ((func= *(func_ptr++)))
447 {
448 if (func->set_aggregator(need_distinct && func->has_with_distinct() ?
449 Aggregator::DISTINCT_AGGREGATOR :
450 Aggregator::SIMPLE_AGGREGATOR))
451 DBUG_RETURN(TRUE);
452 }
453 DBUG_RETURN(FALSE);
454 }
455
456
457 /******************************************************************************
458 Code for calculating functions
459 ******************************************************************************/
460
461
462 /**
463 Call ::setup for all sum functions.
464
465 @param thd thread handler
466 @param func_ptr sum function list
467
468 @retval
469 FALSE ok
470 @retval
471 TRUE error
472 */
473
setup_sum_funcs(THD * thd,Item_sum ** func_ptr)474 bool setup_sum_funcs(THD *thd, Item_sum **func_ptr)
475 {
476 Item_sum *func;
477 DBUG_ENTER("setup_sum_funcs");
478 while ((func= *(func_ptr++)))
479 {
480 if (func->aggregator_setup(thd))
481 DBUG_RETURN(TRUE);
482 }
483 DBUG_RETURN(FALSE);
484 }
485
486
487 static void
init_tmptable_sum_functions(Item_sum ** func_ptr)488 init_tmptable_sum_functions(Item_sum **func_ptr)
489 {
490 Item_sum *func;
491 while ((func= *(func_ptr++)))
492 func->reset_field();
493 }
494
495
496 /** Update record 0 in tmp_table from record 1. */
497
498 static void
update_tmptable_sum_func(Item_sum ** func_ptr,TABLE * tmp_table MY_ATTRIBUTE ((unused)))499 update_tmptable_sum_func(Item_sum **func_ptr,
500 TABLE *tmp_table MY_ATTRIBUTE((unused)))
501 {
502 Item_sum *func;
503 while ((func= *(func_ptr++)))
504 func->update_field();
505 }
506
507
508 /** Copy result of sum functions to record in tmp_table. */
509
510 static void
copy_sum_funcs(Item_sum ** func_ptr,Item_sum ** end_ptr)511 copy_sum_funcs(Item_sum **func_ptr, Item_sum **end_ptr)
512 {
513 for (; func_ptr != end_ptr ; func_ptr++)
514 (*func_ptr)->save_in_result_field(1);
515 return;
516 }
517
518
519 static bool
init_sum_functions(Item_sum ** func_ptr,Item_sum ** end_ptr)520 init_sum_functions(Item_sum **func_ptr, Item_sum **end_ptr)
521 {
522 for (; func_ptr != end_ptr ;func_ptr++)
523 {
524 if ((*func_ptr)->reset_and_add())
525 return 1;
526 }
527 /* If rollup, calculate the upper sum levels */
528 for ( ; *func_ptr ; func_ptr++)
529 {
530 if ((*func_ptr)->aggregator_add())
531 return 1;
532 }
533 return 0;
534 }
535
536
537 static bool
update_sum_func(Item_sum ** func_ptr)538 update_sum_func(Item_sum **func_ptr)
539 {
540 Item_sum *func;
541 for (; (func= *func_ptr) ; func_ptr++)
542 if (func->aggregator_add())
543 return 1;
544 return 0;
545 }
546
547 /**
548 Copy result of functions to record in tmp_table.
549
550 Uses the thread pointer to check for errors in
551 some of the val_xxx() methods called by the
552 save_in_result_field() function.
553 TODO: make the Item::val_xxx() return error code
554
555 @param func_ptr array of the function Items to copy to the tmp table
556 @param thd pointer to the current thread for error checking
557 @retval
558 FALSE if OK
559 @retval
560 TRUE on error
561 */
562
563 bool
copy_funcs(Func_ptr_array * func_ptr,const THD * thd)564 copy_funcs(Func_ptr_array *func_ptr, const THD *thd)
565 {
566 for (size_t ix= 0; ix < func_ptr->size(); ++ix)
567 {
568 Item *func= func_ptr->at(ix);
569 func->save_in_result_field(1);
570 /*
571 Need to check the THD error state because Item::val_xxx() don't
572 return error code, but can generate errors
573 TODO: change it for a real status check when Item::val_xxx()
574 are extended to return status code.
575 */
576 if (thd->is_error())
577 return TRUE;
578 }
579 return FALSE;
580 }
581
582 /*
583 end_select-compatible function that writes the record into a sjm temptable
584
585 SYNOPSIS
586 end_sj_materialize()
587 join The join
588 join_tab Last join table
589 end_of_records FALSE <=> This call is made to pass another record
590 combination
591 TRUE <=> EOF (no action)
592
593 DESCRIPTION
594 This function is used by semi-join materialization to capture suquery's
595 resultset and write it into the temptable (that is, materialize it).
596
597 NOTE
598 This function is used only for semi-join materialization. Non-semijoin
599 materialization uses different mechanism.
600
601 RETURN
602 NESTED_LOOP_OK
603 NESTED_LOOP_ERROR
604 */
605
606 static enum_nested_loop_state
end_sj_materialize(JOIN * join,QEP_TAB * qep_tab,bool end_of_records)607 end_sj_materialize(JOIN *join, QEP_TAB *qep_tab, bool end_of_records)
608 {
609 int error;
610 THD *thd= join->thd;
611 Semijoin_mat_exec *sjm= qep_tab[-1].sj_mat_exec();
612 DBUG_ENTER("end_sj_materialize");
613 if (!end_of_records)
614 {
615 TABLE *table= sjm->table;
616
617 List_iterator<Item> it(sjm->sj_nest->nested_join->sj_inner_exprs);
618 Item *item;
619 while ((item= it++))
620 {
621 if (item->is_null())
622 DBUG_RETURN(NESTED_LOOP_OK);
623 }
624 fill_record(thd, table, table->visible_field_ptr(),
625 sjm->sj_nest->nested_join->sj_inner_exprs,
626 NULL, NULL);
627 if (thd->is_error())
628 DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
629 if (!check_unique_constraint(table))
630 DBUG_RETURN(NESTED_LOOP_OK);
631 if ((error= table->file->ha_write_row(table->record[0])))
632 {
633 /* create_ondisk_from_heap will generate error if needed */
634 if (!table->file->is_ignorable_error(error))
635 {
636 if (create_ondisk_from_heap(thd, table,
637 sjm->table_param.start_recinfo,
638 &sjm->table_param.recinfo, error,
639 TRUE, NULL))
640 DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
641 /* Initialize the index, since create_ondisk_from_heap does
642 not replicate the earlier index initialization */
643 if (table->hash_field)
644 table->file->ha_index_init(0, false);
645 }
646 }
647 }
648 DBUG_RETURN(NESTED_LOOP_OK);
649 }
650
651
652 /**
653 Check appearance of new constant items in multiple equalities
654 of a condition after reading a constant table.
655
656 The function retrieves the cond condition and for each encountered
657 multiple equality checks whether new constants have appeared after
658 reading the constant (single row) table tab. If so it adjusts
659 the multiple equality appropriately.
660
661 @param thd thread handler
662 @param cond condition whose multiple equalities are to be checked
663 @param tab constant table that has been read
664 */
665
update_const_equal_items(THD * thd,Item * cond,JOIN_TAB * tab)666 static bool update_const_equal_items(THD *thd, Item *cond, JOIN_TAB *tab)
667 {
668 if (!(cond->used_tables() & tab->table_ref->map()))
669 return false;
670
671 if (cond->type() == Item::COND_ITEM)
672 {
673 List<Item> *cond_list= ((Item_cond*) cond)->argument_list();
674 List_iterator_fast<Item> li(*cond_list);
675 Item *item;
676 while ((item= li++))
677 {
678 if (update_const_equal_items(thd, item, tab))
679 return true;
680 }
681 }
682 else if (cond->type() == Item::FUNC_ITEM &&
683 ((Item_cond*) cond)->functype() == Item_func::MULT_EQUAL_FUNC)
684 {
685 Item_equal *item_equal= (Item_equal *) cond;
686 bool contained_const= item_equal->get_const() != NULL;
687 if (item_equal->update_const(thd))
688 return true;
689 if (!contained_const && item_equal->get_const())
690 {
691 /* Update keys for range analysis */
692 Item_equal_iterator it(*item_equal);
693 Item_field *item_field;
694 while ((item_field= it++))
695 {
696 Field *field= item_field->field;
697 JOIN_TAB *stat= field->table->reginfo.join_tab;
698 key_map possible_keys= field->key_start;
699 possible_keys.intersect(field->table->keys_in_use_for_query);
700 stat[0].const_keys.merge(possible_keys);
701 stat[0].keys().merge(possible_keys);
702
703 /*
704 For each field in the multiple equality (for which we know that it
705 is a constant) we have to find its corresponding key part, and set
706 that key part in const_key_parts.
707 */
708 if (!possible_keys.is_clear_all())
709 {
710 TABLE *const table= field->table;
711 for (Key_use *use= stat->keyuse();
712 use && use->table_ref == item_field->table_ref;
713 use++)
714 {
715 if (possible_keys.is_set(use->key) &&
716 table->key_info[use->key].key_part[use->keypart].field == field)
717 table->const_key_parts[use->key]|= use->keypart_map;
718 }
719 }
720 }
721 }
722 }
723 return false;
724 }
725
726 /**
727 For some reason, e.g. due to an impossible WHERE clause, the tables cannot
728 possibly contain any rows that will be in the result. This function
729 is used to return with a result based on no matching rows (i.e., an
730 empty result or one row with aggregates calculated without using
731 rows in the case of implicit grouping) before the execution of
732 nested loop join.
733
734 This function may evaluate the HAVING clause and is only meant for
735 result sets that are empty due to an impossible HAVING clause. Do
736 not use it if HAVING has already been evaluated.
737
738 @param join The join that does not produce a row
739 @param fields Fields in result
740 */
741 static void
return_zero_rows(JOIN * join,List<Item> & fields)742 return_zero_rows(JOIN *join, List<Item> &fields)
743 {
744 DBUG_ENTER("return_zero_rows");
745
746 join->join_free();
747
748 /* Update results for FOUND_ROWS */
749 if (!join->send_row_on_empty_set())
750 {
751 join->thd->current_found_rows= 0;
752 }
753
754 SELECT_LEX *const select= join->select_lex;
755
756 if (!(select->query_result()->send_result_set_metadata(fields,
757 Protocol::SEND_NUM_ROWS |
758 Protocol::SEND_EOF)))
759 {
760 bool send_error= FALSE;
761 if (join->send_row_on_empty_set())
762 {
763 // Mark tables as containing only NULL values
764 for (TABLE_LIST *table= select->leaf_tables; table;
765 table= table->next_leaf)
766 table->table->set_null_row();
767
768 // Calculate aggregate functions for no rows
769
770 /*
771 Must notify all fields that there are no rows (not only those
772 that will be returned) because join->having may refer to
773 fields that are not part of the result columns.
774 */
775 List_iterator_fast<Item> it(join->all_fields);
776 Item *item;
777 while ((item= it++))
778 item->no_rows_in_result();
779
780 if (!join->having_cond || join->having_cond->val_int())
781 send_error= select->query_result()->send_data(fields);
782 }
783 if (!send_error)
784 select->query_result()->send_eof(); // Should be safe
785 }
786 DBUG_VOID_RETURN;
787 }
788
789
790 /**
791 @brief Setup write_func of QEP_tmp_table object
792
793 @param join_tab JOIN_TAB of a tmp table
794
795 @details
796 Function sets up write_func according to how QEP_tmp_table object that
797 is attached to the given join_tab will be used in the query.
798 */
799
setup_tmptable_write_func(QEP_TAB * tab)800 void setup_tmptable_write_func(QEP_TAB *tab)
801 {
802 JOIN *join= tab->join();
803 TABLE *table= tab->table();
804 QEP_tmp_table *op= (QEP_tmp_table *)tab->op;
805 Temp_table_param *const tmp_tbl= tab->tmp_table_param;
806
807 assert(table && op);
808
809 if (table->group && tmp_tbl->sum_func_count &&
810 !tmp_tbl->precomputed_group_by)
811 {
812 /*
813 Note for MyISAM tmp tables: if uniques is true keys won't be
814 created.
815 */
816 if (table->s->keys)
817 {
818 DBUG_PRINT("info",("Using end_update"));
819 op->set_write_func(end_update);
820 }
821 }
822 else if (join->sort_and_group && !tmp_tbl->precomputed_group_by)
823 {
824 DBUG_PRINT("info",("Using end_write_group"));
825 op->set_write_func(end_write_group);
826 }
827 else
828 {
829 DBUG_PRINT("info",("Using end_write"));
830 op->set_write_func(end_write);
831 if (tmp_tbl->precomputed_group_by)
832 {
833 Item_sum **func_ptr= join->sum_funcs;
834 Item_sum *func;
835 while ((func= *(func_ptr++)))
836 {
837 tmp_tbl->items_to_copy->push_back(func);
838 }
839 }
840 }
841 }
842
843
844 /**
845 @details
846 Rows produced by a join sweep may end up in a temporary table or be sent
847 to a client. Setup the function of the nested loop join algorithm which
848 handles final fully constructed and matched records.
849
850 @return
851 end_select function to use. This function can't fail.
852 */
get_end_select_func()853 Next_select_func JOIN::get_end_select_func()
854 {
855 /*
856 Choose method for presenting result to user. Use end_send_group
857 if the query requires grouping (has a GROUP BY clause and/or one or
858 more aggregate functions). Use end_send if the query should not
859 be grouped.
860 */
861 if (sort_and_group && !tmp_table_param.precomputed_group_by)
862 {
863 DBUG_PRINT("info",("Using end_send_group"));
864 return end_send_group;
865 }
866 DBUG_PRINT("info",("Using end_send"));
867 return end_send;
868 }
869
870
871 /**
872 Make a join of all tables and write it on socket or to table.
873
874 @retval
875 0 if ok
876 @retval
877 1 if error is sent
878 @retval
879 -1 if error should be sent
880 */
881
882 static int
do_select(JOIN * join)883 do_select(JOIN *join)
884 {
885 int rc= 0;
886 enum_nested_loop_state error= NESTED_LOOP_OK;
887 DBUG_ENTER("do_select");
888
889 join->send_records=0;
890 if (join->plan_is_const() && !join->need_tmp)
891 {
892 Next_select_func end_select= join->get_end_select_func();
893 /*
894 HAVING will be checked after processing aggregate functions,
895 But WHERE should checkd here (we alredy have read tables)
896
897 @todo: consider calling end_select instead of duplicating code
898 */
899 if (!join->where_cond || join->where_cond->val_int())
900 {
901 // HAVING will be checked by end_select
902 error= (*end_select)(join, 0, 0);
903 if (error >= NESTED_LOOP_OK)
904 error= (*end_select)(join, 0, 1);
905
906 /*
907 If we don't go through evaluate_join_record(), do the counting
908 here. join->send_records is increased on success in end_send(),
909 so we don't touch it here.
910 */
911 join->examined_rows++;
912 assert(join->examined_rows <= 1);
913 }
914 else if (join->send_row_on_empty_set())
915 {
916 table_map save_nullinfo= 0;
917 /*
918 If this is a subquery, we need to save and later restore
919 the const table NULL info before clearing the tables
920 because the following executions of the subquery do not
921 reevaluate constant fields. @see save_const_null_info
922 and restore_const_null_info
923 */
924 if (join->select_lex->master_unit()->item && join->const_tables)
925 save_const_null_info(join, &save_nullinfo);
926
927 // Calculate aggregate functions for no rows
928 List_iterator_fast<Item> it(*join->fields);
929 Item *item;
930 while ((item= it++))
931 item->no_rows_in_result();
932
933 // Mark tables as containing only NULL values
934 if (join->clear())
935 error= NESTED_LOOP_ERROR;
936 else
937 {
938 if (!join->having_cond || join->having_cond->val_int())
939 rc= join->select_lex->query_result()->send_data(*join->fields);
940
941 if (save_nullinfo)
942 restore_const_null_info(join, save_nullinfo);
943 }
944 }
945 /*
946 An error can happen when evaluating the conds
947 (the join condition and piece of where clause
948 relevant to this join table).
949 */
950 if (join->thd->is_error())
951 error= NESTED_LOOP_ERROR;
952 }
953 else
954 {
955 QEP_TAB *qep_tab= join->qep_tab + join->const_tables;
956 assert(join->primary_tables);
957 error= join->first_select(join,qep_tab,0);
958 if (error >= NESTED_LOOP_OK)
959 error= join->first_select(join,qep_tab,1);
960 }
961
962 join->thd->current_found_rows= join->send_records;
963 /*
964 For "order by with limit", we cannot rely on send_records, but need
965 to use the rowcount read originally into the join_tab applying the
966 filesort. There cannot be any post-filtering conditions, nor any
967 following join_tabs in this case, so this rowcount properly represents
968 the correct number of qualifying rows.
969 */
970 if (join->qep_tab && join->order)
971 {
972 // Save # of found records prior to cleanup
973 QEP_TAB *sort_tab;
974 uint const_tables= join->const_tables;
975
976 // Take record count from first non constant table or from last tmp table
977 if (join->tmp_tables > 0)
978 sort_tab= &join->qep_tab[join->primary_tables + join->tmp_tables - 1];
979 else
980 {
981 assert(!join->plan_is_const());
982 sort_tab= &join->qep_tab[const_tables];
983 }
984 if (sort_tab->filesort &&
985 join->calc_found_rows &&
986 sort_tab->filesort->sortorder &&
987 sort_tab->filesort->limit != HA_POS_ERROR)
988 {
989 join->thd->current_found_rows= sort_tab->records();
990 }
991 }
992
993 {
994 /*
995 The following will unlock all cursors if the command wasn't an
996 update command
997 */
998 join->join_free(); // Unlock all cursors
999 }
1000 if (error == NESTED_LOOP_OK)
1001 {
1002 /*
1003 Sic: this branch works even if rc != 0, e.g. when
1004 send_data above returns an error.
1005 */
1006 if (join->select_lex->query_result()->send_eof())
1007 rc= 1; // Don't send error
1008 DBUG_PRINT("info",("%ld records output", (long) join->send_records));
1009 }
1010 else
1011 rc= -1;
1012 #ifndef NDEBUG
1013 if (rc)
1014 {
1015 DBUG_PRINT("error",("Error: do_select() failed"));
1016 }
1017 #endif
1018 rc= join->thd->is_error() ? -1 : rc;
1019 DBUG_RETURN(rc);
1020 }
1021
1022
1023 /**
1024 @brief Accumulate full or partial join result in operation and send
1025 operation's result further.
1026
1027 @param join pointer to the structure providing all context info for the query
1028 @param join_tab the JOIN_TAB object to which the operation is attached
1029 @param end_records TRUE <=> all records were accumulated, send them further
1030
1031 @details
1032 This function accumulates records, one by one, in QEP operation's buffer by
1033 calling op->put_record(). When there is no more records to save, in this
1034 case the end_of_records argument == true, function tells QEP operation to
1035 send records further by calling op->send_records().
1036 When all records are sent this function passes 'end_of_records' signal
1037 further by calling sub_select() with end_of_records argument set to
1038 true. After that op->end_send() is called to tell QEP operation that
1039 it could end internal buffer scan.
1040
1041 @note
1042 This function is not expected to be called when dynamic range scan is
1043 used to scan join_tab because join cache is disabled for such scan
1044 and range scans aren't used for tmp tables.
1045 @see setup_join_buffering
1046 For caches the function implements the algorithmic schema for both
1047 Blocked Nested Loop Join and Batched Key Access Join. The difference can
1048 be seen only at the level of of the implementation of the put_record and
1049 send_records virtual methods for the cache object associated with the
1050 join_tab.
1051
1052 @return
1053 return one of enum_nested_loop_state.
1054 */
1055
1056 enum_nested_loop_state
sub_select_op(JOIN * join,QEP_TAB * qep_tab,bool end_of_records)1057 sub_select_op(JOIN *join, QEP_TAB *qep_tab, bool end_of_records)
1058 {
1059 DBUG_ENTER("sub_select_op");
1060
1061 if (join->thd->killed)
1062 {
1063 /* The user has aborted the execution of the query */
1064 join->thd->send_kill_message();
1065 DBUG_RETURN(NESTED_LOOP_KILLED);
1066 }
1067
1068 enum_nested_loop_state rc;
1069 QEP_operation *op= qep_tab->op;
1070
1071 /* This function cannot be called if qep_tab has no associated operation */
1072 assert(op != NULL);
1073
1074 if (end_of_records)
1075 {
1076 rc= op->end_send();
1077 if (rc >= NESTED_LOOP_OK)
1078 rc= sub_select(join, qep_tab, end_of_records);
1079 DBUG_RETURN(rc);
1080 }
1081 if (qep_tab->prepare_scan())
1082 DBUG_RETURN(NESTED_LOOP_ERROR);
1083
1084 /*
1085 setup_join_buffering() disables join buffering if QS_DYNAMIC_RANGE is
1086 enabled.
1087 */
1088 assert(!qep_tab->dynamic_range());
1089
1090 rc= op->put_record();
1091
1092 DBUG_RETURN(rc);
1093 }
1094
1095
1096 /**
1097 Retrieve records ends with a given beginning from the result of a join.
1098
1099 SYNPOSIS
1100 sub_select()
1101 join pointer to the structure providing all context info for the query
1102 join_tab the first next table of the execution plan to be retrieved
1103 end_records true when we need to perform final steps of retrival
1104
1105 DESCRIPTION
1106 For a given partial join record consisting of records from the tables
1107 preceding the table join_tab in the execution plan, the function
1108 retrieves all matching full records from the result set and
1109 send them to the result set stream.
1110
1111 @note
1112 The function effectively implements the final (n-k) nested loops
1113 of nested loops join algorithm, where k is the ordinal number of
1114 the join_tab table and n is the total number of tables in the join query.
1115 It performs nested loops joins with all conjunctive predicates from
1116 the where condition pushed as low to the tables as possible.
1117 E.g. for the query
1118 @code
1119 SELECT * FROM t1,t2,t3
1120 WHERE t1.a=t2.a AND t2.b=t3.b AND t1.a BETWEEN 5 AND 9
1121 @endcode
1122 the predicate (t1.a BETWEEN 5 AND 9) will be pushed to table t1,
1123 given the selected plan prescribes to nest retrievals of the
1124 joined tables in the following order: t1,t2,t3.
1125 A pushed down predicate are attached to the table which it pushed to,
1126 at the field join_tab->cond.
1127 When executing a nested loop of level k the function runs through
1128 the rows of 'join_tab' and for each row checks the pushed condition
1129 attached to the table.
1130 If it is false the function moves to the next row of the
1131 table. If the condition is true the function recursively executes (n-k-1)
1132 remaining embedded nested loops.
1133 The situation becomes more complicated if outer joins are involved in
1134 the execution plan. In this case the pushed down predicates can be
1135 checked only at certain conditions.
1136 Suppose for the query
1137 @code
1138 SELECT * FROM t1 LEFT JOIN (t2,t3) ON t3.a=t1.a
1139 WHERE t1>2 AND (t2.b>5 OR t2.b IS NULL)
1140 @endcode
1141 the optimizer has chosen a plan with the table order t1,t2,t3.
1142 The predicate P1=t1>2 will be pushed down to the table t1, while the
1143 predicate P2=(t2.b>5 OR t2.b IS NULL) will be attached to the table
1144 t2. But the second predicate can not be unconditionally tested right
1145 after a row from t2 has been read. This can be done only after the
1146 first row with t3.a=t1.a has been encountered.
1147 Thus, the second predicate P2 is supplied with a guarded value that are
1148 stored in the field 'found' of the first inner table for the outer join
1149 (table t2). When the first row with t3.a=t1.a for the current row
1150 of table t1 appears, the value becomes true. For now on the predicate
1151 is evaluated immediately after the row of table t2 has been read.
1152 When the first row with t3.a=t1.a has been encountered all
1153 conditions attached to the inner tables t2,t3 must be evaluated.
1154 Only when all of them are true the row is sent to the output stream.
1155 If not, the function returns to the lowest nest level that has a false
1156 attached condition.
1157 The predicates from on expressions are also pushed down. If in the
1158 the above example the on expression were (t3.a=t1.a AND t2.a=t1.a),
1159 then t1.a=t2.a would be pushed down to table t2, and without any
1160 guard.
1161 If after the run through all rows of table t2, the first inner table
1162 for the outer join operation, it turns out that no matches are
1163 found for the current row of t1, then current row from table t1
1164 is complemented by nulls for t2 and t3. Then the pushed down predicates
1165 are checked for the composed row almost in the same way as it had
1166 been done for the first row with a match. The only difference is
1167 the predicates from on expressions are not checked.
1168
1169 @par
1170 @b IMPLEMENTATION
1171 @par
1172 The function forms output rows for a current partial join of k
1173 tables tables recursively.
1174 For each partial join record ending with a certain row from
1175 join_tab it calls sub_select that builds all possible matching
1176 tails from the result set.
1177 To be able check predicates conditionally items of the class
1178 Item_func_trig_cond are employed.
1179 An object of this class is constructed from an item of class COND
1180 and a pointer to a guarding boolean variable.
1181 When the value of the guard variable is true the value of the object
1182 is the same as the value of the predicate, otherwise it's just returns
1183 true.
1184 To carry out a return to a nested loop level of join table t the pointer
1185 to t is remembered in the field 'return_tab' of the join structure.
1186 Consider the following query:
1187 @code
1188 SELECT * FROM t1,
1189 LEFT JOIN
1190 (t2, t3 LEFT JOIN (t4,t5) ON t5.a=t3.a)
1191 ON t4.a=t2.a
1192 WHERE (t2.b=5 OR t2.b IS NULL) AND (t4.b=2 OR t4.b IS NULL)
1193 @endcode
1194 Suppose the chosen execution plan dictates the order t1,t2,t3,t4,t5
1195 and suppose for a given joined rows from tables t1,t2,t3 there are
1196 no rows in the result set yet.
1197 When first row from t5 that satisfies the on condition
1198 t5.a=t3.a is found, the pushed down predicate t4.b=2 OR t4.b IS NULL
1199 becomes 'activated', as well the predicate t4.a=t2.a. But
1200 the predicate (t2.b=5 OR t2.b IS NULL) can not be checked until
1201 t4.a=t2.a becomes true.
1202 In order not to re-evaluate the predicates that were already evaluated
1203 as attached pushed down predicates, a pointer to the the first
1204 most inner unmatched table is maintained in join_tab->first_unmatched.
1205 Thus, when the first row from t5 with t5.a=t3.a is found
1206 this pointer for t5 is changed from t4 to t2.
1207
1208 @par
1209 @b STRUCTURE @b NOTES
1210 @par
1211 join_tab->first_unmatched points always backwards to the first inner
1212 table of the embedding nested join, if any.
1213
1214 @param join pointer to the structure providing all context info for
1215 the query
1216 @param join_tab the first next table of the execution plan to be retrieved
1217 @param end_records true when we need to perform final steps of retrival
1218
1219 @return
1220 return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS.
1221 */
1222
1223 enum_nested_loop_state
sub_select(JOIN * join,QEP_TAB * const qep_tab,bool end_of_records)1224 sub_select(JOIN *join, QEP_TAB *const qep_tab,bool end_of_records)
1225 {
1226 DBUG_ENTER("sub_select");
1227
1228 qep_tab->table()->reset_null_row();
1229
1230 if (end_of_records)
1231 {
1232 enum_nested_loop_state nls=
1233 (*qep_tab->next_select)(join,qep_tab+1,end_of_records);
1234 DBUG_RETURN(nls);
1235 }
1236 READ_RECORD *info= &qep_tab->read_record;
1237
1238 if (qep_tab->prepare_scan())
1239 DBUG_RETURN(NESTED_LOOP_ERROR);
1240
1241 if (qep_tab->starts_weedout())
1242 {
1243 do_sj_reset(qep_tab->flush_weedout_table);
1244 }
1245
1246 const plan_idx qep_tab_idx= qep_tab->idx();
1247 join->return_tab= qep_tab_idx;
1248 qep_tab->not_null_compl= true;
1249 qep_tab->found_match= false;
1250
1251 if (qep_tab->last_inner() != NO_PLAN_IDX)
1252 {
1253 /* qep_tab is the first inner table for an outer join operation. */
1254
1255 /* Set initial state of guard variables for this table.*/
1256 qep_tab->found= false;
1257
1258 /* Set first_unmatched for the last inner table of this group */
1259 QEP_AT(qep_tab, last_inner()).first_unmatched= qep_tab_idx;
1260 }
1261 if (qep_tab->do_firstmatch() || qep_tab->do_loosescan())
1262 {
1263 /*
1264 qep_tab is the first table of a LooseScan range, or has a "jump"
1265 address in a FirstMatch range.
1266 Reset the matching for this round of execution.
1267 */
1268 QEP_AT(qep_tab, match_tab).found_match= false;
1269 }
1270
1271 join->thd->get_stmt_da()->reset_current_row_for_condition();
1272
1273 enum_nested_loop_state rc= NESTED_LOOP_OK;
1274 bool in_first_read= true;
1275 const bool pfs_batch_update= qep_tab->pfs_batch_update(join);
1276 if (pfs_batch_update)
1277 qep_tab->table()->file->start_psi_batch_mode();
1278 while (rc == NESTED_LOOP_OK && join->return_tab >= qep_tab_idx)
1279 {
1280 int error;
1281 if (in_first_read)
1282 {
1283 in_first_read= false;
1284 error= (*qep_tab->read_first_record)(qep_tab);
1285 }
1286 else
1287 error= info->read_record(info);
1288
1289 DBUG_EXECUTE_IF("bug13822652_1", join->thd->killed= THD::KILL_QUERY;);
1290
1291 if (error > 0 || (join->thd->is_error())) // Fatal error
1292 rc= NESTED_LOOP_ERROR;
1293 else if (error < 0)
1294 break;
1295 else if (join->thd->killed) // Aborted by user
1296 {
1297 join->thd->send_kill_message();
1298 rc= NESTED_LOOP_KILLED;
1299 }
1300 else
1301 {
1302 if (qep_tab->keep_current_rowid)
1303 qep_tab->table()->file->position(qep_tab->table()->record[0]);
1304 rc= evaluate_join_record(join, qep_tab);
1305 }
1306 }
1307
1308 if (rc == NESTED_LOOP_OK &&
1309 qep_tab->last_inner() != NO_PLAN_IDX &&
1310 !qep_tab->found)
1311 rc= evaluate_null_complemented_join_record(join, qep_tab);
1312
1313 if (pfs_batch_update)
1314 qep_tab->table()->file->end_psi_batch_mode();
1315
1316 DBUG_RETURN(rc);
1317 }
1318
1319
1320 /**
1321 @brief Prepare table to be scanned.
1322
1323 @details This function is the place to do any work on the table that
1324 needs to be done before table can be scanned. Currently it
1325 only materialized derived tables and semi-joined subqueries and binds
1326 buffer for current rowid.
1327
1328 @returns false - Ok, true - error
1329 */
1330
prepare_scan()1331 bool QEP_TAB::prepare_scan()
1332 {
1333 // Check whether materialization is required.
1334 if (!materialize_table || materialized)
1335 return false;
1336
1337 // Materialize table prior to reading it
1338 if ((*materialize_table)(this))
1339 return true;
1340
1341 materialized= true;
1342
1343 // Bind to the rowid buffer managed by the TABLE object.
1344 if (copy_current_rowid)
1345 copy_current_rowid->bind_buffer(table()->file->ref);
1346
1347 return false;
1348 }
1349
1350
1351 /**
1352 SemiJoinDuplicateElimination: Weed out duplicate row combinations
1353
1354 SYNPOSIS
1355 do_sj_dups_weedout()
1356 thd Thread handle
1357 sjtbl Duplicate weedout table
1358
1359 DESCRIPTION
1360 Try storing current record combination of outer tables (i.e. their
1361 rowids) in the temporary table. This records the fact that we've seen
1362 this record combination and also tells us if we've seen it before.
1363
1364 RETURN
1365 -1 Error
1366 1 The row combination is a duplicate (discard it)
1367 0 The row combination is not a duplicate (continue)
1368 */
1369
do_sj_dups_weedout(THD * thd,SJ_TMP_TABLE * sjtbl)1370 int do_sj_dups_weedout(THD *thd, SJ_TMP_TABLE *sjtbl)
1371 {
1372 int error;
1373 SJ_TMP_TABLE::TAB *tab= sjtbl->tabs;
1374 SJ_TMP_TABLE::TAB *tab_end= sjtbl->tabs_end;
1375
1376 DBUG_ENTER("do_sj_dups_weedout");
1377
1378 if (sjtbl->is_confluent)
1379 {
1380 if (sjtbl->have_confluent_row)
1381 DBUG_RETURN(1);
1382 else
1383 {
1384 sjtbl->have_confluent_row= TRUE;
1385 DBUG_RETURN(0);
1386 }
1387 }
1388
1389 uchar *ptr= sjtbl->tmp_table->visible_field_ptr()[0]->ptr;
1390 // Put the rowids tuple into table->record[0]:
1391 // 1. Store the length
1392 if (((Field_varstring*)(sjtbl->tmp_table->visible_field_ptr()[0]))->
1393 length_bytes == 1)
1394 {
1395 *ptr= (uchar)(sjtbl->rowid_len + sjtbl->null_bytes);
1396 ptr++;
1397 }
1398 else
1399 {
1400 int2store(ptr, sjtbl->rowid_len + sjtbl->null_bytes);
1401 ptr += 2;
1402 }
1403
1404 // 2. Zero the null bytes
1405 uchar *const nulls_ptr= ptr;
1406 if (sjtbl->null_bytes)
1407 {
1408 memset(ptr, 0, sjtbl->null_bytes);
1409 ptr += sjtbl->null_bytes;
1410 }
1411
1412 // 3. Put the rowids
1413 for (uint i=0; tab != tab_end; tab++, i++)
1414 {
1415 handler *h= tab->qep_tab->table()->file;
1416 if (tab->qep_tab->table()->is_nullable() &&
1417 tab->qep_tab->table()->has_null_row())
1418 {
1419 /* It's a NULL-complemented row */
1420 *(nulls_ptr + tab->null_byte) |= tab->null_bit;
1421 memset(ptr + tab->rowid_offset, 0, h->ref_length);
1422 }
1423 else
1424 {
1425 /* Copy the rowid value */
1426 memcpy(ptr + tab->rowid_offset, h->ref, h->ref_length);
1427 }
1428 }
1429
1430 if (!check_unique_constraint(sjtbl->tmp_table))
1431 DBUG_RETURN(1);
1432 error= sjtbl->tmp_table->file->ha_write_row(sjtbl->tmp_table->record[0]);
1433 if (error)
1434 {
1435 /* If this is a duplicate error, return immediately */
1436 if (sjtbl->tmp_table->file->is_ignorable_error(error))
1437 DBUG_RETURN(1);
1438 /*
1439 Other error than duplicate error: Attempt to create a temporary table.
1440 */
1441 bool is_duplicate;
1442 if (create_ondisk_from_heap(thd, sjtbl->tmp_table,
1443 sjtbl->start_recinfo, &sjtbl->recinfo,
1444 error, TRUE, &is_duplicate))
1445 DBUG_RETURN(-1);
1446 DBUG_RETURN(is_duplicate ? 1 : 0);
1447 }
1448 DBUG_RETURN(0);
1449 }
1450
1451
1452 /**
1453 SemiJoinDuplicateElimination: Reset the temporary table
1454 */
1455
do_sj_reset(SJ_TMP_TABLE * sj_tbl)1456 static int do_sj_reset(SJ_TMP_TABLE *sj_tbl)
1457 {
1458 DBUG_ENTER("do_sj_reset");
1459 if (sj_tbl->tmp_table)
1460 {
1461 int rc= sj_tbl->tmp_table->file->ha_delete_all_rows();
1462 DBUG_RETURN(rc);
1463 }
1464 sj_tbl->have_confluent_row= FALSE;
1465 DBUG_RETURN(0);
1466 }
1467
1468 /**
1469 @brief Process one row of the nested loop join.
1470
1471 This function will evaluate parts of WHERE/ON clauses that are
1472 applicable to the partial row on hand and in case of success
1473 submit this row to the next level of the nested loop.
1474 join_tab->return_tab may be modified to cause a return to a previous
1475 join_tab.
1476
1477 @param join - The join object
1478 @param join_tab - The most inner join_tab being processed
1479
1480 @return Nested loop state
1481 */
1482
1483 static enum_nested_loop_state
evaluate_join_record(JOIN * join,QEP_TAB * const qep_tab)1484 evaluate_join_record(JOIN *join, QEP_TAB *const qep_tab)
1485 {
1486 bool not_used_in_distinct= qep_tab->not_used_in_distinct;
1487 ha_rows found_records=join->found_records;
1488 Item *condition= qep_tab->condition();
1489 const plan_idx qep_tab_idx= qep_tab->idx();
1490 bool found= TRUE;
1491 DBUG_ENTER("evaluate_join_record");
1492 DBUG_PRINT("enter",
1493 ("join: %p join_tab index: %d table: %s cond: %p",
1494 join, static_cast<int>(qep_tab_idx),
1495 qep_tab->table()->alias, condition));
1496
1497 if (condition)
1498 {
1499 found= MY_TEST(condition->val_int());
1500
1501 if (join->thd->killed)
1502 {
1503 join->thd->send_kill_message();
1504 DBUG_RETURN(NESTED_LOOP_KILLED);
1505 }
1506
1507 /* check for errors evaluating the condition */
1508 if (join->thd->is_error())
1509 DBUG_RETURN(NESTED_LOOP_ERROR);
1510 }
1511 if (found)
1512 {
1513 /*
1514 There is no condition on this join_tab or the attached pushed down
1515 condition is true => a match is found.
1516 */
1517 while (qep_tab->first_unmatched != NO_PLAN_IDX && found)
1518 {
1519 /*
1520 The while condition is always false if join_tab is not
1521 the last inner join table of an outer join operation.
1522 */
1523 QEP_TAB *first_unmatched= &QEP_AT(qep_tab, first_unmatched);
1524 /*
1525 Mark that a match for the current row of the outer table is found.
1526 This activates WHERE clause predicates attached the inner tables of
1527 the outer join.
1528 */
1529 first_unmatched->found= true;
1530 for (QEP_TAB *tab= first_unmatched; tab <= qep_tab; tab++)
1531 {
1532 /*
1533 Check all predicates that have just been activated.
1534
1535 Actually all predicates non-guarded by first_unmatched->found
1536 will be re-evaluated again. It could be fixed, but, probably,
1537 it's not worth doing now.
1538
1539 not_exists_optimize has been created from a
1540 condition containing 'is_null'. This 'is_null'
1541 predicate is still present on any 'tab' with
1542 'not_exists_optimize'. Furthermore, the usual rules
1543 for condition guards also applies for
1544 'not_exists_optimize' -> When 'is_null==false' we
1545 know all cond. guards are open and we can apply
1546 the 'not_exists_optimize'.
1547 */
1548 assert(!(tab->table()->reginfo.not_exists_optimize &&
1549 !tab->condition()));
1550
1551 if (tab->condition() && !tab->condition()->val_int())
1552 {
1553 /* The condition attached to table tab is false */
1554
1555 if (tab->table()->reginfo.not_exists_optimize)
1556 {
1557 /*
1558 When not_exists_optimizer is set and a matching row is found, the
1559 outer row should be excluded from the result set: no need to
1560 explore this record, thus we don't call the next_select.
1561 And, no need to explore other following records of 'tab', so we
1562 set join->return_tab.
1563 As we set join_tab->found above, evaluate_join_record() at the
1564 upper level will not yield a NULL-complemented record.
1565 Note that the calculation below can set return_tab to -1
1566 i.e. PRE_FIRST_PLAN_IDX.
1567 */
1568 join->return_tab= qep_tab_idx - 1;
1569 DBUG_RETURN(NESTED_LOOP_OK);
1570 }
1571
1572 if (tab == qep_tab)
1573 found= 0;
1574 else
1575 {
1576 /*
1577 Set a return point if rejected predicate is attached
1578 not to the last table of the current nest level.
1579 */
1580 join->return_tab= tab->idx();
1581 DBUG_RETURN(NESTED_LOOP_OK);
1582 }
1583 }
1584 /* check for errors evaluating the condition */
1585 if (join->thd->is_error())
1586 DBUG_RETURN(NESTED_LOOP_ERROR);
1587 }
1588 /*
1589 Check whether join_tab is not the last inner table
1590 for another embedding outer join.
1591 */
1592 plan_idx f_u= first_unmatched->first_upper();
1593 if (f_u != NO_PLAN_IDX && join->qep_tab[f_u].last_inner() != qep_tab_idx)
1594 f_u= NO_PLAN_IDX;
1595 qep_tab->first_unmatched= f_u;
1596 }
1597
1598 plan_idx return_tab= join->return_tab;
1599
1600 if (qep_tab->finishes_weedout() && found)
1601 {
1602 int res= do_sj_dups_weedout(join->thd, qep_tab->check_weed_out_table);
1603 if (res == -1)
1604 DBUG_RETURN(NESTED_LOOP_ERROR);
1605 else if (res == 1)
1606 found= FALSE;
1607 }
1608 else if (qep_tab->do_loosescan() &&
1609 QEP_AT(qep_tab, match_tab).found_match)
1610 {
1611 /*
1612 Loosescan algorithm requires an access method that gives 'sorted'
1613 retrieval of keys, or an access method that provides only one
1614 row (which is inherently sorted).
1615 EQ_REF and LooseScan may happen if dependencies in subquery (e.g.,
1616 outer join) prevents table pull-out.
1617 */
1618 assert(qep_tab->use_order() || qep_tab->type() == JT_EQ_REF);
1619
1620 /*
1621 Previous row combination for duplicate-generating range,
1622 generated a match. Compare keys of this row and previous row
1623 to determine if this is a duplicate that should be skipped.
1624 */
1625 if (key_cmp(qep_tab->table()->key_info[qep_tab->index()].key_part,
1626 qep_tab->loosescan_buf, qep_tab->loosescan_key_len))
1627 /*
1628 Keys do not match.
1629 Reset found_match for last table of duplicate-generating range,
1630 to avoid comparing keys until a new match has been found.
1631 */
1632 QEP_AT(qep_tab, match_tab).found_match= false;
1633 else
1634 found= false;
1635 }
1636
1637 /*
1638 It was not just a return to lower loop level when one
1639 of the newly activated predicates is evaluated as false
1640 (See above join->return_tab= tab).
1641 */
1642 join->examined_rows++;
1643 DBUG_PRINT("counts", ("evaluate_join_record join->examined_rows++: %lu",
1644 (ulong) join->examined_rows));
1645
1646 if (found)
1647 {
1648 enum enum_nested_loop_state rc;
1649 // A match is found for the current partial join prefix.
1650 qep_tab->found_match= true;
1651
1652 rc= (*qep_tab->next_select)(join, qep_tab+1, 0);
1653 join->thd->get_stmt_da()->inc_current_row_for_condition();
1654 if (rc != NESTED_LOOP_OK)
1655 DBUG_RETURN(rc);
1656
1657 /* check for errors evaluating the condition */
1658 if (join->thd->is_error())
1659 DBUG_RETURN(NESTED_LOOP_ERROR);
1660
1661 if (qep_tab->do_loosescan() &&
1662 QEP_AT(qep_tab,match_tab).found_match)
1663 {
1664 /*
1665 A match was found for a duplicate-generating range of a semijoin.
1666 Copy key to be able to determine whether subsequent rows
1667 will give duplicates that should be skipped.
1668 */
1669 KEY *key= qep_tab->table()->key_info + qep_tab->index();
1670 key_copy(qep_tab->loosescan_buf, qep_tab->table()->record[0],
1671 key, qep_tab->loosescan_key_len);
1672 }
1673 else if (qep_tab->do_firstmatch() &&
1674 QEP_AT(qep_tab, match_tab).found_match)
1675 {
1676 /*
1677 We should return to join_tab->firstmatch_return after we have
1678 enumerated all the suffixes for current prefix row combination
1679 */
1680 set_if_smaller(return_tab, qep_tab->firstmatch_return);
1681 }
1682
1683 /*
1684 Test if this was a SELECT DISTINCT query on a table that
1685 was not in the field list; In this case we can abort if
1686 we found a row, as no new rows can be added to the result.
1687 */
1688 if (not_used_in_distinct && found_records != join->found_records)
1689 set_if_smaller(return_tab, qep_tab_idx - 1);
1690
1691 set_if_smaller(join->return_tab, return_tab);
1692 }
1693 else
1694 {
1695 join->thd->get_stmt_da()->inc_current_row_for_condition();
1696 if (qep_tab->not_null_compl)
1697 {
1698 /* a NULL-complemented row is not in a table so cannot be locked */
1699 qep_tab->read_record.unlock_row(qep_tab);
1700 }
1701 }
1702 }
1703 else
1704 {
1705 /*
1706 The condition pushed down to the table join_tab rejects all rows
1707 with the beginning coinciding with the current partial join.
1708 */
1709 join->examined_rows++;
1710 join->thd->get_stmt_da()->inc_current_row_for_condition();
1711 if (qep_tab->not_null_compl)
1712 qep_tab->read_record.unlock_row(qep_tab);
1713 }
1714 DBUG_RETURN(NESTED_LOOP_OK);
1715 }
1716
1717
1718 /**
1719
1720 @details
1721 Construct a NULL complimented partial join record and feed it to the next
1722 level of the nested loop. This function is used in case we have
1723 an OUTER join and no matching record was found.
1724 */
1725
1726 static enum_nested_loop_state
evaluate_null_complemented_join_record(JOIN * join,QEP_TAB * qep_tab)1727 evaluate_null_complemented_join_record(JOIN *join, QEP_TAB *qep_tab)
1728 {
1729 /*
1730 The table join_tab is the first inner table of a outer join operation
1731 and no matches has been found for the current outer row.
1732 */
1733 QEP_TAB *first_inner_tab= qep_tab;
1734 QEP_TAB *last_inner_tab= &QEP_AT(qep_tab, last_inner());
1735
1736 DBUG_ENTER("evaluate_null_complemented_join_record");
1737
1738 for ( ; qep_tab <= last_inner_tab ; qep_tab++)
1739 {
1740 // Make sure that the rowid buffer is bound, duplicates weedout needs it
1741 if (qep_tab->copy_current_rowid &&
1742 !qep_tab->copy_current_rowid->buffer_is_bound())
1743 qep_tab->copy_current_rowid->bind_buffer(qep_tab->table()->file->ref);
1744
1745 /* Change the the values of guard predicate variables. */
1746 qep_tab->found= true;
1747 qep_tab->not_null_compl= false;
1748 /* The outer row is complemented by nulls for each inner tables */
1749 restore_record(qep_tab->table(),s->default_values); // Make empty record
1750 qep_tab->table()->set_null_row(); // For group by without error
1751 if (qep_tab->starts_weedout() && qep_tab > first_inner_tab)
1752 {
1753 // sub_select() has not performed a reset for this table.
1754 do_sj_reset(qep_tab->flush_weedout_table);
1755 }
1756 /* Check all attached conditions for inner table rows. */
1757 if (qep_tab->condition() && !qep_tab->condition()->val_int())
1758 {
1759 if (join->thd->killed)
1760 {
1761 join->thd->send_kill_message();
1762 DBUG_RETURN(NESTED_LOOP_KILLED);
1763 }
1764
1765 /* check for errors */
1766 if (join->thd->is_error())
1767 DBUG_RETURN(NESTED_LOOP_ERROR);
1768 else
1769 DBUG_RETURN(NESTED_LOOP_OK);
1770 }
1771 }
1772 qep_tab= last_inner_tab;
1773 /*
1774 From the point of view of the rest of execution, this record matches
1775 (it has been built and satisfies conditions, no need to do more evaluation
1776 on it). See similar code in evaluate_join_record().
1777 */
1778 plan_idx f_u= QEP_AT(qep_tab, first_unmatched).first_upper();
1779 if (f_u != NO_PLAN_IDX &&
1780 join->qep_tab[f_u].last_inner() != qep_tab->idx())
1781 f_u= NO_PLAN_IDX;
1782 qep_tab->first_unmatched= f_u;
1783 /*
1784 The row complemented by nulls satisfies all conditions
1785 attached to inner tables.
1786 Finish evaluation of record and send it to be joined with
1787 remaining tables.
1788 Note that evaluate_join_record will re-evaluate the condition attached
1789 to the last inner table of the current outer join. This is not deemed to
1790 have a significant performance impact.
1791 */
1792 const enum_nested_loop_state rc= evaluate_join_record(join, qep_tab);
1793
1794 for (QEP_TAB *tab= first_inner_tab; tab <= last_inner_tab; tab++)
1795 tab->table()->reset_null_row();
1796
1797 DBUG_RETURN(rc);
1798 }
1799
1800
1801 /*****************************************************************************
1802 The different ways to read a record
1803 Returns -1 if row was not found, 0 if row was found and 1 on errors
1804 *****************************************************************************/
1805
1806 /** Help function when we get some an error from the table handler. */
1807
report_handler_error(TABLE * table,int error)1808 int report_handler_error(TABLE *table, int error)
1809 {
1810 if (error == HA_ERR_END_OF_FILE || error == HA_ERR_KEY_NOT_FOUND)
1811 {
1812 table->status= STATUS_GARBAGE;
1813 return -1; // key not found; ok
1814 }
1815 /*
1816 Do not spam the error log with these temporary errors:
1817 LOCK_DEADLOCK LOCK_WAIT_TIMEOUT TABLE_DEF_CHANGED
1818 Also skip printing to error log if the current thread has been killed.
1819 */
1820 if (error != HA_ERR_LOCK_DEADLOCK &&
1821 error != HA_ERR_LOCK_WAIT_TIMEOUT &&
1822 error != HA_ERR_TABLE_DEF_CHANGED &&
1823 !table->in_use->killed)
1824 sql_print_error("Got error %d when reading table '%s'",
1825 error, table->s->path.str);
1826 table->file->print_error(error,MYF(0));
1827 return 1;
1828 }
1829
1830
safe_index_read(QEP_TAB * tab)1831 int safe_index_read(QEP_TAB *tab)
1832 {
1833 int error;
1834 TABLE *table= tab->table();
1835 if ((error=table->file->ha_index_read_map(table->record[0],
1836 tab->ref().key_buff,
1837 make_prev_keypart_map(tab->ref().key_parts),
1838 HA_READ_KEY_EXACT)))
1839 return report_handler_error(table, error);
1840 return 0;
1841 }
1842
1843
1844 /**
1845 Reads content of constant table
1846 @param tab table
1847 @param pos position of table in query plan
1848 @retval 0 ok, one row was found or one NULL-complemented row was created
1849 @retval -1 ok, no row was found and no NULL-complemented row was created
1850 @retval 1 error
1851 */
1852
1853 int
join_read_const_table(JOIN_TAB * tab,POSITION * pos)1854 join_read_const_table(JOIN_TAB *tab, POSITION *pos)
1855 {
1856 int error;
1857 DBUG_ENTER("join_read_const_table");
1858 TABLE *table=tab->table();
1859 table->const_table= true;
1860 table->reset_null_row();
1861 table->status= STATUS_GARBAGE | STATUS_NOT_FOUND;
1862
1863 if (table->reginfo.lock_type >= TL_WRITE_ALLOW_WRITE)
1864 {
1865 const enum_sql_command sql_command= tab->join()->thd->lex->sql_command;
1866 if (sql_command == SQLCOM_UPDATE_MULTI ||
1867 sql_command == SQLCOM_DELETE_MULTI)
1868 {
1869 /*
1870 In a multi-UPDATE, if we represent "depends on" with "->", we have:
1871 "what columns to read (read_set)" ->
1872 "whether table will be updated on-the-fly or with tmp table" ->
1873 "whether to-be-updated columns are used by access path"
1874 "access path to table (range, ref, scan...)" ->
1875 "query execution plan" ->
1876 "what tables are const" ->
1877 "reading const tables" ->
1878 "what columns to read (read_set)".
1879 To break this loop, we always read all columns of a constant table if
1880 it is going to be updated.
1881 Another case is in multi-UPDATE and multi-DELETE, when the table has a
1882 trigger: bits of columns needed by the trigger are turned on in
1883 result->initialize_tables(), which has not yet been called when we do
1884 the reading now, so we must read all columns.
1885 */
1886 bitmap_set_all(table->read_set);
1887 /* Virtual generated columns must be writable */
1888 for (Field **vfield_ptr= table->vfield; vfield_ptr && *vfield_ptr; vfield_ptr++)
1889 bitmap_set_bit(table->write_set, (*vfield_ptr)->field_index);
1890 table->file->column_bitmaps_signal();
1891 }
1892 }
1893
1894 if (tab->type() == JT_SYSTEM)
1895 error= read_system(table);
1896 else
1897 {
1898 if (!table->key_read && table->covering_keys.is_set(tab->ref().key) &&
1899 !table->no_keyread &&
1900 (int) table->reginfo.lock_type <= (int) TL_READ_HIGH_PRIORITY)
1901 {
1902 table->set_keyread(TRUE);
1903 tab->set_index(tab->ref().key);
1904 }
1905 error= read_const(table, &tab->ref());
1906 table->set_keyread(FALSE);
1907 }
1908
1909 if (error)
1910 {
1911 /* Mark for EXPLAIN that the row was not found */
1912 pos->filter_effect= 1.0;
1913 pos->rows_fetched= 0.0;
1914 pos->prefix_rowcount= 0.0;
1915 pos->ref_depend_map= 0;
1916 if (!tab->table_ref->outer_join || error > 0)
1917 DBUG_RETURN(error);
1918 }
1919
1920 if (tab->join_cond() && !table->has_null_row())
1921 {
1922 // We cannot handle outer-joined tables with expensive join conditions here:
1923 assert(!tab->join_cond()->is_expensive());
1924 if (tab->join_cond()->val_int() == 0)
1925 table->set_null_row();
1926 }
1927
1928 /* Check appearance of new constant items in Item_equal objects */
1929 JOIN *const join= tab->join();
1930 THD *const thd= join->thd;
1931 if (join->where_cond &&
1932 update_const_equal_items(thd, join->where_cond, tab))
1933 DBUG_RETURN(1);
1934 TABLE_LIST *tbl;
1935 for (tbl= join->select_lex->leaf_tables; tbl; tbl= tbl->next_leaf)
1936 {
1937 TABLE_LIST *embedded;
1938 TABLE_LIST *embedding= tbl;
1939 do
1940 {
1941 embedded= embedding;
1942 if (embedded->join_cond_optim() &&
1943 update_const_equal_items(thd, embedded->join_cond_optim(), tab))
1944 DBUG_RETURN(1);
1945 embedding= embedded->embedding;
1946 }
1947 while (embedding &&
1948 embedding->nested_join->join_list.head() == embedded);
1949 }
1950
1951 DBUG_RETURN(0);
1952 }
1953
1954
1955 /**
1956 Read a constant table when there is at most one matching row, using a table
1957 scan.
1958
1959 @param table Table to read
1960
1961 @retval 0 Row was found
1962 @retval -1 Row was not found
1963 @retval 1 Got an error (other than row not found) during read
1964 */
read_system(TABLE * table)1965 static int read_system(TABLE *table)
1966 {
1967 int error;
1968 if (table->status & STATUS_GARBAGE) // If first read
1969 {
1970 if ((error=table->file->read_first_row(table->record[0],
1971 table->s->primary_key)))
1972 {
1973 if (error != HA_ERR_END_OF_FILE)
1974 return report_handler_error(table, error);
1975 table->set_null_row();
1976 empty_record(table); // Make empty record
1977 return -1;
1978 }
1979 store_record(table,record[1]);
1980 }
1981 else if (!table->status) // Only happens with left join
1982 restore_record(table,record[1]); // restore old record
1983 table->reset_null_row();
1984 return table->status ? -1 : 0;
1985 }
1986
1987
1988 /**
1989 Read a constant table when there is at most one matching row, using an
1990 index lookup.
1991
1992 @param tab Table to read
1993
1994 @retval 0 Row was found
1995 @retval -1 Row was not found
1996 @retval 1 Got an error (other than row not found) during read
1997 */
1998
1999 static int
join_read_const(QEP_TAB * tab)2000 join_read_const(QEP_TAB *tab)
2001 {
2002 return read_const(tab->table(), &tab->ref());
2003 }
2004
read_const(TABLE * table,TABLE_REF * ref)2005 static int read_const(TABLE *table, TABLE_REF *ref)
2006 {
2007 int error;
2008 DBUG_ENTER("read_const");
2009
2010 if (table->status & STATUS_GARBAGE) // If first read
2011 {
2012 table->status= 0;
2013 if (cp_buffer_from_ref(table->in_use, table, ref))
2014 error=HA_ERR_KEY_NOT_FOUND;
2015 else
2016 {
2017 error=table->file->ha_index_read_idx_map(table->record[0],ref->key,
2018 ref->key_buff,
2019 make_prev_keypart_map(ref->key_parts),
2020 HA_READ_KEY_EXACT);
2021 }
2022 if (error)
2023 {
2024 table->status= STATUS_NOT_FOUND;
2025 table->set_null_row();
2026 empty_record(table);
2027 if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
2028 {
2029 const int ret= report_handler_error(table, error);
2030 DBUG_RETURN(ret);
2031 }
2032 DBUG_RETURN(-1);
2033 }
2034 store_record(table,record[1]);
2035 }
2036 else if (!(table->status & ~STATUS_NULL_ROW)) // Only happens with left join
2037 {
2038 table->status=0;
2039 restore_record(table,record[1]); // restore old record
2040 }
2041 table->reset_null_row();
2042 DBUG_RETURN(table->status ? -1 : 0);
2043 }
2044
2045
2046 /**
2047 Read row using unique key: eq_ref access method implementation
2048
2049 @details
2050 This is the "read_first" function for the eq_ref access method.
2051 The difference from ref access function is that it has a one-element
2052 lookup cache (see cmp_buffer_with_ref)
2053
2054 @param tab JOIN_TAB of the accessed table
2055
2056 @retval 0 - Ok
2057 @retval -1 - Row not found
2058 @retval 1 - Error
2059 */
2060
2061 static int
join_read_key(QEP_TAB * tab)2062 join_read_key(QEP_TAB *tab)
2063 {
2064 TABLE *const table= tab->table();
2065 TABLE_REF *table_ref= &tab->ref();
2066 int error;
2067
2068 if (!table->file->inited)
2069 {
2070 /*
2071 Disable caching for inner table of outer join, since setting the NULL
2072 property on the table will overwrite NULL bits and hence destroy the
2073 current row for later use as a cached row.
2074 */
2075 if (tab->table_ref->is_inner_table_of_outer_join())
2076 table_ref->disable_cache= true;
2077 assert(!tab->use_order()); //Don't expect sort req. for single row.
2078 if ((error= table->file->ha_index_init(table_ref->key, tab->use_order())))
2079 {
2080 (void) report_handler_error(table, error);
2081 return 1;
2082 }
2083 }
2084
2085 /*
2086 We needn't do "Late NULLs Filtering" because eq_ref is restricted to
2087 indices on NOT NULL columns (see create_ref_for_key()).
2088 */
2089 if (cmp_buffer_with_ref(tab->join()->thd, table, table_ref) ||
2090 (table->status & (STATUS_GARBAGE | STATUS_NULL_ROW)))
2091 {
2092 if (table_ref->key_err)
2093 {
2094 table->status=STATUS_NOT_FOUND;
2095 return -1;
2096 }
2097 /*
2098 Moving away from the current record. Unlock the row
2099 in the handler if it did not match the partial WHERE.
2100 */
2101 if (table_ref->has_record && table_ref->use_count == 0)
2102 {
2103 table->file->unlock_row();
2104 table_ref->has_record= FALSE;
2105 }
2106 error= table->file->ha_index_read_map(table->record[0],
2107 table_ref->key_buff,
2108 make_prev_keypart_map(table_ref->key_parts),
2109 HA_READ_KEY_EXACT);
2110 if (error && error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
2111 return report_handler_error(table, error);
2112
2113 if (! error)
2114 {
2115 table_ref->has_record= TRUE;
2116 table_ref->use_count= 1;
2117 }
2118 }
2119 else if (table->status == 0)
2120 {
2121 assert(table_ref->has_record);
2122 table_ref->use_count++;
2123 }
2124 table->reset_null_row();
2125 return table->status ? -1 : 0;
2126 }
2127
2128 /**
2129 Since join_read_key may buffer a record, do not unlock
2130 it if it was not used in this invocation of join_read_key().
2131 Only count locks, thus remembering if the record was left unused,
2132 and unlock already when pruning the current value of
2133 TABLE_REF buffer.
2134 @sa join_read_key()
2135 */
2136
2137 void
join_read_key_unlock_row(QEP_TAB * tab)2138 join_read_key_unlock_row(QEP_TAB *tab)
2139 {
2140 assert(tab->ref().use_count);
2141 if (tab->ref().use_count)
2142 tab->ref().use_count--;
2143 }
2144
2145 /**
2146 Rows from const tables are read once but potentially used
2147 multiple times during execution of a query.
2148 Ensure such rows are never unlocked during query execution.
2149 */
2150
2151 void
join_const_unlock_row(QEP_TAB * tab)2152 join_const_unlock_row(QEP_TAB *tab)
2153 {
2154 assert(tab->type() == JT_CONST);
2155 }
2156
2157 /**
2158 Read a table *assumed* to be included in execution of a pushed join.
2159 This is the counterpart of join_read_key() / join_read_always_key()
2160 for child tables in a pushed join.
2161
2162 When the table access is performed as part of the pushed join,
2163 all 'linked' child colums are prefetched together with the parent row.
2164 The handler will then only format the row as required by MySQL and set
2165 'table->status' accordingly.
2166
2167 However, there may be situations where the prepared pushed join was not
2168 executed as assumed. It is the responsibility of the handler to handle
2169 these situation by letting ::index_read_pushed() then effectively do a
2170 plain old' index_read_map(..., HA_READ_KEY_EXACT);
2171
2172 @param tab Table to read
2173
2174 @retval
2175 0 Row was found
2176 @retval
2177 -1 Row was not found
2178 @retval
2179 1 Got an error (other than row not found) during read
2180 */
2181 static int
join_read_linked_first(QEP_TAB * tab)2182 join_read_linked_first(QEP_TAB *tab)
2183 {
2184 int error;
2185 TABLE *table= tab->table();
2186 DBUG_ENTER("join_read_linked_first");
2187
2188 assert(!tab->use_order()); // Pushed child can't be sorted
2189 if (!table->file->inited &&
2190 (error= table->file->ha_index_init(tab->ref().key, tab->use_order())))
2191 {
2192 (void) report_handler_error(table, error);
2193 DBUG_RETURN(error);
2194 }
2195
2196 /* Perform "Late NULLs Filtering" (see internals manual for explanations) */
2197 if (tab->ref().impossible_null_ref())
2198 {
2199 DBUG_PRINT("info", ("join_read_linked_first null_rejected"));
2200 DBUG_RETURN(-1);
2201 }
2202
2203 if (cp_buffer_from_ref(tab->join()->thd, table, &tab->ref()))
2204 {
2205 table->status=STATUS_NOT_FOUND;
2206 DBUG_RETURN(-1);
2207 }
2208
2209 // 'read' itself is a NOOP:
2210 // handler::index_read_pushed() only unpack the prefetched row and set 'status'
2211 error=table->file->index_read_pushed(table->record[0],
2212 tab->ref().key_buff,
2213 make_prev_keypart_map(tab->ref().key_parts));
2214 if (unlikely(error && error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE))
2215 DBUG_RETURN(report_handler_error(table, error));
2216
2217 table->reset_null_row();
2218 int rc= table->status ? -1 : 0;
2219 DBUG_RETURN(rc);
2220 }
2221
2222 static int
join_read_linked_next(READ_RECORD * info)2223 join_read_linked_next(READ_RECORD *info)
2224 {
2225 TABLE *table= info->table;
2226 DBUG_ENTER("join_read_linked_next");
2227
2228 int error=table->file->index_next_pushed(table->record[0]);
2229 if (error)
2230 {
2231 if (unlikely(error != HA_ERR_END_OF_FILE))
2232 DBUG_RETURN(report_handler_error(table, error));
2233 table->status= STATUS_GARBAGE;
2234 DBUG_RETURN(-1);
2235 }
2236 DBUG_RETURN(error);
2237 }
2238
2239 /*
2240 ref access method implementation: "read_first" function
2241
2242 SYNOPSIS
2243 join_read_always_key()
2244 tab JOIN_TAB of the accessed table
2245
2246 DESCRIPTION
2247 This is "read_fist" function for the "ref" access method.
2248
2249 The functon must leave the index initialized when it returns.
2250 ref_or_null access implementation depends on that.
2251
2252 RETURN
2253 0 - Ok
2254 -1 - Row not found
2255 1 - Error
2256 */
2257
2258 static int
join_read_always_key(QEP_TAB * tab)2259 join_read_always_key(QEP_TAB *tab)
2260 {
2261 int error;
2262 TABLE *table= tab->table();
2263
2264 /* Initialize the index first */
2265 if (!table->file->inited &&
2266 (error= table->file->ha_index_init(tab->ref().key, tab->use_order())))
2267 {
2268 (void) report_handler_error(table, error);
2269 return 1;
2270 }
2271
2272 /* Perform "Late NULLs Filtering" (see internals manual for explanations) */
2273 TABLE_REF *ref= &tab->ref();
2274 if (ref->impossible_null_ref())
2275 {
2276 DBUG_PRINT("info", ("join_read_always_key null_rejected"));
2277 return -1;
2278 }
2279
2280 if (cp_buffer_from_ref(tab->join()->thd, table, ref))
2281 return -1;
2282 if ((error= table->file->prepare_index_key_scan_map(tab->ref().key_buff,
2283 make_prev_keypart_map(tab->ref().key_parts))))
2284 return report_handler_error(table, error);
2285 if ((error= table->file->ha_index_read_map(table->record[0],
2286 tab->ref().key_buff,
2287 make_prev_keypart_map(tab->ref().key_parts),
2288 HA_READ_KEY_EXACT)))
2289 {
2290 if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
2291 return report_handler_error(table, error);
2292 return -1; /* purecov: inspected */
2293 }
2294 return 0;
2295 }
2296
2297
2298 /**
2299 This function is used when optimizing away ORDER BY in
2300 SELECT * FROM t1 WHERE a=1 ORDER BY a DESC,b DESC.
2301 */
2302
2303 int
join_read_last_key(QEP_TAB * tab)2304 join_read_last_key(QEP_TAB *tab)
2305 {
2306 int error;
2307 TABLE *table= tab->table();
2308
2309 if (!table->file->inited &&
2310 (error= table->file->ha_index_init(tab->ref().key, tab->use_order())))
2311 {
2312 (void) report_handler_error(table, error);
2313 return 1;
2314 }
2315 if (cp_buffer_from_ref(tab->join()->thd, table, &tab->ref()))
2316 return -1;
2317 if ((error=table->file->ha_index_read_last_map(table->record[0],
2318 tab->ref().key_buff,
2319 make_prev_keypart_map(tab->ref().key_parts))))
2320 {
2321 if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
2322 return report_handler_error(table, error);
2323 return -1; /* purecov: inspected */
2324 }
2325 return 0;
2326 }
2327
2328
2329 /* ARGSUSED */
2330 static int
join_no_more_records(READ_RECORD * info MY_ATTRIBUTE ((unused)))2331 join_no_more_records(READ_RECORD *info MY_ATTRIBUTE((unused)))
2332 {
2333 return -1;
2334 }
2335
2336
2337 static int
join_read_next_same(READ_RECORD * info)2338 join_read_next_same(READ_RECORD *info)
2339 {
2340 int error;
2341 TABLE *table= info->table;
2342 QEP_TAB *tab=table->reginfo.qep_tab;
2343
2344 if ((error= table->file->ha_index_next_same(table->record[0],
2345 tab->ref().key_buff,
2346 tab->ref().key_length)))
2347 {
2348 if (error != HA_ERR_END_OF_FILE)
2349 return report_handler_error(table, error);
2350 table->status= STATUS_GARBAGE;
2351 return -1;
2352 }
2353 return 0;
2354 }
2355
2356
2357 int
join_read_prev_same(READ_RECORD * info)2358 join_read_prev_same(READ_RECORD *info)
2359 {
2360 int error;
2361 TABLE *table= info->table;
2362 QEP_TAB *tab=table->reginfo.qep_tab;
2363
2364 /*
2365 Using ha_index_prev() for reading records from the table can cause
2366 performance issues if used in combination with ICP. The ICP code
2367 in the storage engine does not know when to stop reading from the
2368 index and a call to ha_index_prev() might cause the storage engine
2369 to read to the beginning of the index if no qualifying record is
2370 found.
2371 */
2372 assert(table->file->pushed_idx_cond == NULL);
2373
2374 if ((error= table->file->ha_index_prev(table->record[0])))
2375 return report_handler_error(table, error);
2376 if (key_cmp_if_same(table, tab->ref().key_buff, tab->ref().key,
2377 tab->ref().key_length))
2378 {
2379 table->status=STATUS_NOT_FOUND;
2380 error= -1;
2381 }
2382 return error;
2383 }
2384
2385
2386 int
join_init_quick_read_record(QEP_TAB * tab)2387 join_init_quick_read_record(QEP_TAB *tab)
2388 {
2389 /*
2390 This is for QS_DYNAMIC_RANGE, i.e., "Range checked for each
2391 record". The trace for the range analysis below this point will
2392 be printed with different ranges for every record to the left of
2393 this table in the join.
2394 */
2395
2396 THD *const thd= tab->join()->thd;
2397 #ifdef OPTIMIZER_TRACE
2398 Opt_trace_context * const trace= &thd->opt_trace;
2399 const bool disable_trace=
2400 tab->quick_traced_before &&
2401 !trace->feature_enabled(Opt_trace_context::DYNAMIC_RANGE);
2402 Opt_trace_disable_I_S disable_trace_wrapper(trace, disable_trace);
2403
2404 tab->quick_traced_before= true;
2405
2406 Opt_trace_object wrapper(trace);
2407 Opt_trace_object trace_table(trace, "rows_estimation_per_outer_row");
2408 trace_table.add_utf8_table(tab->table_ref);
2409 #endif
2410
2411 /*
2412 If this join tab was read through a QUICK for the last record
2413 combination from earlier tables, deleting that quick will close the
2414 index. Otherwise, we need to close the index before the next join
2415 iteration starts because the handler object might be reused by a different
2416 access strategy.
2417 */
2418 if (!tab->quick() &&
2419 (tab->table()->file->inited != handler::NONE))
2420 tab->table()->file->ha_index_or_rnd_end();
2421
2422 key_map needed_reg_dummy;
2423 QUICK_SELECT_I *old_qck= tab->quick();
2424 QUICK_SELECT_I *qck;
2425 DEBUG_SYNC(thd, "quick_not_created");
2426 const int rc= test_quick_select(thd,
2427 tab->keys(),
2428 0, // empty table map
2429 HA_POS_ERROR,
2430 false, // don't force quick range
2431 ORDER::ORDER_NOT_RELEVANT, tab,
2432 tab->condition(), &needed_reg_dummy, &qck,
2433 tab->table()->force_index);
2434 assert(old_qck == NULL || old_qck != qck) ;
2435 tab->set_quick(qck);
2436
2437 /*
2438 EXPLAIN CONNECTION is used to understand why a query is currently taking
2439 so much time. So it makes sense to show what the execution is doing now:
2440 is it a table scan or a range scan? A range scan on which index.
2441 So: below we want to change the type and quick visible in EXPLAIN, and for
2442 that, we need to take mutex and change type and quick_optim.
2443 */
2444
2445 DEBUG_SYNC(thd, "quick_created_before_mutex");
2446
2447 thd->lock_query_plan();
2448 tab->set_type(qck ? calc_join_type(qck->get_type()) : JT_ALL);
2449 tab->set_quick_optim();
2450 thd->unlock_query_plan();
2451
2452 delete old_qck;
2453 DEBUG_SYNC(thd, "quick_droped_after_mutex");
2454
2455 return (rc == -1) ?
2456 -1 : /* No possible records */
2457 join_init_read_record(tab);
2458 }
2459
2460
read_first_record_seq(QEP_TAB * tab)2461 int read_first_record_seq(QEP_TAB *tab)
2462 {
2463 if (tab->read_record.table->file->ha_rnd_init(1))
2464 return 1;
2465 return (*tab->read_record.read_record)(&tab->read_record);
2466 }
2467
2468
2469 /**
2470 @brief Prepare table for reading rows and read first record.
2471 @details
2472 Prior to reading the table following tasks are done, (in the order of
2473 execution):
2474 .) derived tables are materialized
2475 .) duplicates removed (tmp tables only)
2476 .) table is sorted with filesort (both non-tmp and tmp tables)
2477 After this have been done this function resets quick select, if it's
2478 present, sets up table reading functions, and reads first record.
2479
2480 @retval
2481 0 Ok
2482 @retval
2483 -1 End of records
2484 @retval
2485 1 Error
2486 */
2487
join_init_read_record(QEP_TAB * tab)2488 int join_init_read_record(QEP_TAB *tab)
2489 {
2490 int error;
2491
2492 if (tab->distinct && tab->remove_duplicates()) // Remove duplicates.
2493 return 1;
2494 if (tab->filesort && tab->sort_table()) // Sort table.
2495 return 1;
2496
2497 if (tab->quick() && (error= tab->quick()->reset()))
2498 {
2499 /* Ensures error status is propageted back to client */
2500 report_handler_error(tab->table(), error);
2501 return 1;
2502 }
2503 if (init_read_record(&tab->read_record, tab->join()->thd, NULL, tab,
2504 1, 1, FALSE))
2505 return 1;
2506
2507 return (*tab->read_record.read_record)(&tab->read_record);
2508 }
2509
2510 /*
2511 This helper function materializes derived table/view and then calls
2512 read_first_record function to set up access to the materialized table.
2513 */
2514
join_materialize_derived(QEP_TAB * tab)2515 int join_materialize_derived(QEP_TAB *tab)
2516 {
2517 THD *const thd= tab->table()->in_use;
2518 TABLE_LIST *const derived= tab->table_ref;
2519
2520 assert(derived->uses_materialization() && !tab->materialized);
2521
2522 if (derived->materializable_is_const()) // Has been materialized by optimizer
2523 return NESTED_LOOP_OK;
2524
2525 bool res= derived->materialize_derived(thd);
2526 res|= derived->cleanup_derived();
2527 DEBUG_SYNC(thd, "after_materialize_derived");
2528 return res ? NESTED_LOOP_ERROR : NESTED_LOOP_OK;
2529 }
2530
2531
2532
2533 /*
2534 Helper function for materialization of a semi-joined subquery.
2535
2536 @param tab JOIN_TAB referencing a materialized semi-join table
2537
2538 @return Nested loop state
2539 */
2540
2541 int
join_materialize_semijoin(QEP_TAB * tab)2542 join_materialize_semijoin(QEP_TAB *tab)
2543 {
2544 DBUG_ENTER("join_materialize_semijoin");
2545
2546 Semijoin_mat_exec *const sjm= tab->sj_mat_exec();
2547
2548 QEP_TAB *const first= tab->join()->qep_tab + sjm->inner_table_index;
2549 QEP_TAB *const last= first + (sjm->table_count - 1);
2550 /*
2551 Set up the end_sj_materialize function after the last inner table,
2552 so that generated rows are inserted into the materialized table.
2553 */
2554 last->next_select= end_sj_materialize;
2555 last->set_sj_mat_exec(sjm); // TODO: This violates comment for sj_mat_exec!
2556 if (tab->table()->hash_field)
2557 tab->table()->file->ha_index_init(0, 0);
2558 int rc;
2559 if ((rc= sub_select(tab->join(), first, false)) < 0)
2560 DBUG_RETURN(rc);
2561 if ((rc= sub_select(tab->join(), first, true)) < 0)
2562 DBUG_RETURN(rc);
2563 if (tab->table()->hash_field)
2564 tab->table()->file->ha_index_or_rnd_end();
2565
2566 last->next_select= NULL;
2567 last->set_sj_mat_exec(NULL);
2568
2569 #if !defined(NDEBUG) || defined(HAVE_VALGRIND)
2570 // Fields of inner tables should not be read anymore:
2571 for (QEP_TAB *t= first; t <= last; t++)
2572 {
2573 TABLE *const inner_table= t->table();
2574 TRASH(inner_table->record[0], inner_table->s->reclength);
2575 }
2576 #endif
2577
2578 DBUG_RETURN(NESTED_LOOP_OK);
2579 }
2580
2581
2582 /**
2583 Check if access to this JOIN_TAB has to retrieve rows
2584 in sorted order as defined by the ordered index
2585 used to access this table.
2586 */
2587 bool
use_order() const2588 QEP_TAB::use_order() const
2589 {
2590 /*
2591 No need to require sorted access for single row reads
2592 being performed by const- or EQ_REF-accessed tables.
2593 */
2594 if (type() == JT_EQ_REF || type() == JT_CONST || type() == JT_SYSTEM)
2595 return false;
2596
2597 /*
2598 First non-const table requires sorted results
2599 if ORDER or GROUP BY use ordered index.
2600 */
2601 if ((uint)idx() == join()->const_tables &&
2602 join()->ordered_index_usage != JOIN::ordered_index_void)
2603 return true;
2604
2605 /*
2606 LooseScan strategy for semijoin requires sorted
2607 results even if final result is not to be sorted.
2608 */
2609 if (position()->sj_strategy == SJ_OPT_LOOSE_SCAN)
2610 return true;
2611
2612 /* Fall through: Results don't have to be sorted */
2613 return false;
2614 }
2615
2616 /*
2617 Helper function for sorting table with filesort.
2618 */
2619
2620 bool
sort_table()2621 QEP_TAB::sort_table()
2622 {
2623 DBUG_PRINT("info",("Sorting for index"));
2624 THD_STAGE_INFO(join()->thd, stage_creating_sort_index);
2625 assert(join()->ordered_index_usage != (filesort->order == join()->order ?
2626 JOIN::ordered_index_order_by :
2627 JOIN::ordered_index_group_by));
2628 const bool rc= create_sort_index(join()->thd, join(), this) != 0;
2629 /*
2630 Filesort has filtered rows already (see skip_record() in
2631 find_all_keys()): so we can simply scan the cache, so have to set
2632 quick=NULL.
2633 But if we do this, we still need to delete the quick, now or later. We
2634 cannot do it now: the dtor of quick_index_merge would do free_io_cache,
2635 but the cache has to remain, because scan will read from it.
2636 So we delay deletion: we just let the "quick" continue existing in
2637 "quick_optim"; double benefit:
2638 - EXPLAIN will show the "quick_optim"
2639 - it will be deleted late enough.
2640 */
2641 set_quick(NULL);
2642 set_condition(NULL);
2643 return rc;
2644 }
2645
2646
2647 int
join_read_first(QEP_TAB * tab)2648 join_read_first(QEP_TAB *tab)
2649 {
2650 int error;
2651 TABLE *table=tab->table();
2652 if (table->covering_keys.is_set(tab->index()) && !table->no_keyread)
2653 table->set_keyread(TRUE);
2654 table->status=0;
2655 tab->read_record.table=table;
2656 tab->read_record.record=table->record[0];
2657 tab->read_record.read_record=join_read_next;
2658
2659 if (!table->file->inited &&
2660 (error= table->file->ha_index_init(tab->index(), tab->use_order())))
2661 {
2662 (void) report_handler_error(table, error);
2663 return 1;
2664 }
2665
2666 if ((error= table->file->prepare_index_scan()))
2667 {
2668 report_handler_error(table, error);
2669 return 1;
2670 }
2671
2672 if ((error= table->file->ha_index_first(tab->table()->record[0])))
2673 {
2674 if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
2675 report_handler_error(table, error);
2676 return -1;
2677 }
2678 return 0;
2679 }
2680
2681
2682 static int
join_read_next(READ_RECORD * info)2683 join_read_next(READ_RECORD *info)
2684 {
2685 int error;
2686 if ((error= info->table->file->ha_index_next(info->record)))
2687 return report_handler_error(info->table, error);
2688 return 0;
2689 }
2690
2691
2692 int
join_read_last(QEP_TAB * tab)2693 join_read_last(QEP_TAB *tab)
2694 {
2695 TABLE *table=tab->table();
2696 int error;
2697 if (table->covering_keys.is_set(tab->index()) && !table->no_keyread)
2698 table->set_keyread(TRUE);
2699 table->status=0;
2700 tab->read_record.read_record=join_read_prev;
2701 tab->read_record.table=table;
2702 tab->read_record.record=table->record[0];
2703 if (!table->file->inited &&
2704 (error= table->file->ha_index_init(tab->index(), tab->use_order())))
2705 {
2706 (void) report_handler_error(table, error);
2707 return 1;
2708 }
2709 if ((error= table->file->prepare_index_scan()))
2710 return report_handler_error(table, error);
2711 if ((error= table->file->ha_index_last(table->record[0])))
2712 return report_handler_error(table, error);
2713 return 0;
2714 }
2715
2716
2717 static int
join_read_prev(READ_RECORD * info)2718 join_read_prev(READ_RECORD *info)
2719 {
2720 int error;
2721 if ((error= info->table->file->ha_index_prev(info->record)))
2722 return report_handler_error(info->table, error);
2723 return 0;
2724 }
2725
2726
2727 static int
join_ft_read_first(QEP_TAB * tab)2728 join_ft_read_first(QEP_TAB *tab)
2729 {
2730 int error;
2731 TABLE *table= tab->table();
2732
2733 if (!table->file->inited &&
2734 (error= table->file->ha_index_init(tab->ref().key, tab->use_order())))
2735 {
2736 (void) report_handler_error(table, error);
2737 return 1;
2738 }
2739 table->file->ft_init();
2740
2741 if ((error= table->file->ft_read(table->record[0])))
2742 return report_handler_error(table, error);
2743 return 0;
2744 }
2745
2746 static int
join_ft_read_next(READ_RECORD * info)2747 join_ft_read_next(READ_RECORD *info)
2748 {
2749 int error;
2750 if ((error= info->table->file->ft_read(info->table->record[0])))
2751 return report_handler_error(info->table, error);
2752 return 0;
2753 }
2754
2755
2756 /**
2757 Reading of key with key reference and one part that may be NULL.
2758 */
2759
2760 static int
join_read_always_key_or_null(QEP_TAB * tab)2761 join_read_always_key_or_null(QEP_TAB *tab)
2762 {
2763 int res;
2764
2765 /* First read according to key which is NOT NULL */
2766 *tab->ref().null_ref_key= 0; // Clear null byte
2767 if ((res= join_read_always_key(tab)) >= 0)
2768 return res;
2769
2770 /* Then read key with null value */
2771 *tab->ref().null_ref_key= 1; // Set null byte
2772 return safe_index_read(tab);
2773 }
2774
2775
2776 static int
join_read_next_same_or_null(READ_RECORD * info)2777 join_read_next_same_or_null(READ_RECORD *info)
2778 {
2779 int error;
2780 if ((error= join_read_next_same(info)) >= 0)
2781 return error;
2782 QEP_TAB *tab= info->table->reginfo.qep_tab;
2783
2784 /* Test if we have already done a read after null key */
2785 if (*tab->ref().null_ref_key)
2786 return -1; // All keys read
2787 *tab->ref().null_ref_key= 1; // Set null byte
2788 return safe_index_read(tab); // then read null keys
2789 }
2790
2791
2792 /**
2793 Pick the appropriate access method functions
2794
2795 Sets the functions for the selected table access method
2796
2797 @param join_tab JOIN_TAB for this QEP_TAB
2798
2799 @todo join_init_read_record/join_read_(last|first) set
2800 tab->read_record.read_record internally. Do the same in other first record
2801 reading functions.
2802 */
2803
pick_table_access_method(const JOIN_TAB * join_tab)2804 void QEP_TAB::pick_table_access_method(const JOIN_TAB *join_tab)
2805 {
2806 ASSERT_BEST_REF_IN_JOIN_ORDER(join());
2807 assert(join_tab == join()->best_ref[idx()]);
2808 assert(table());
2809 assert(read_first_record == NULL);
2810 // Only some access methods support reversed access:
2811 assert(!join_tab->reversed_access || type() == JT_REF ||
2812 type() == JT_INDEX_SCAN);
2813 // Fall through to set default access functions:
2814 switch (type())
2815 {
2816 case JT_REF:
2817 if (join_tab->reversed_access)
2818 {
2819 read_first_record= join_read_last_key;
2820 read_record.read_record= join_read_prev_same;
2821 }
2822 else
2823 {
2824 read_first_record= join_read_always_key;
2825 read_record.read_record= join_read_next_same;
2826 }
2827 break;
2828
2829 case JT_REF_OR_NULL:
2830 read_first_record= join_read_always_key_or_null;
2831 read_record.read_record= join_read_next_same_or_null;
2832 break;
2833
2834 case JT_CONST:
2835 read_first_record= join_read_const;
2836 read_record.read_record= join_no_more_records;
2837 read_record.unlock_row= join_const_unlock_row;
2838 break;
2839
2840 case JT_EQ_REF:
2841 read_first_record= join_read_key;
2842 read_record.read_record= join_no_more_records;
2843 read_record.unlock_row= join_read_key_unlock_row;
2844 break;
2845
2846 case JT_FT:
2847 read_first_record= join_ft_read_first;
2848 read_record.read_record= join_ft_read_next;
2849 break;
2850
2851 case JT_INDEX_SCAN:
2852 read_first_record= join_tab->reversed_access ?
2853 join_read_last : join_read_first;
2854 break;
2855 case JT_ALL:
2856 case JT_RANGE:
2857 case JT_INDEX_MERGE:
2858 read_first_record= (join_tab->use_quick == QS_DYNAMIC_RANGE) ?
2859 join_init_quick_read_record : join_init_read_record;
2860 break;
2861 default:
2862 assert(0);
2863 break;
2864 }
2865 }
2866
2867
2868 /**
2869 Install the appropriate 'linked' access method functions
2870 if this part of the join have been converted to pushed join.
2871 */
2872
set_pushed_table_access_method(void)2873 void QEP_TAB::set_pushed_table_access_method(void)
2874 {
2875 DBUG_ENTER("set_pushed_table_access_method");
2876 assert(table());
2877
2878 /**
2879 Setup modified access function for children of pushed joins.
2880 */
2881 const TABLE *pushed_root= table()->file->root_of_pushed_join();
2882 if (pushed_root && pushed_root != table())
2883 {
2884 /**
2885 Is child of a pushed join operation:
2886 Replace access functions with its linked counterpart.
2887 ... Which is effectively a NOOP as the row is already fetched
2888 together with the root of the linked operation.
2889 */
2890 DBUG_PRINT("info", ("Modifying table access method for '%s'",
2891 table()->s->table_name.str));
2892 assert(type() != JT_REF_OR_NULL);
2893 read_first_record= join_read_linked_first;
2894 read_record.read_record= join_read_linked_next;
2895 // Use the default unlock_row function
2896 read_record.unlock_row = rr_unlock_row;
2897 }
2898 DBUG_VOID_RETURN;
2899 }
2900
2901 /*****************************************************************************
2902 DESCRIPTION
2903 Functions that end one nested loop iteration. Different functions
2904 are used to support GROUP BY clause and to redirect records
2905 to a table (e.g. in case of SELECT into a temporary table) or to the
2906 network client.
2907 See the enum_nested_loop_state enumeration for the description of return
2908 values.
2909 *****************************************************************************/
2910
2911 /* ARGSUSED */
2912 static enum_nested_loop_state
end_send(JOIN * join,QEP_TAB * qep_tab,bool end_of_records)2913 end_send(JOIN *join, QEP_TAB *qep_tab, bool end_of_records)
2914 {
2915 DBUG_ENTER("end_send");
2916 /*
2917 When all tables are const this function is called with jointab == NULL.
2918 This function shouldn't be called for the first join_tab as it needs
2919 to get fields from previous tab.
2920
2921 Note that qep_tab may be one past the last of qep_tab! So don't read its
2922 pointed content. But you can read qep_tab[-1] then.
2923 */
2924 assert(qep_tab == NULL || qep_tab > join->qep_tab);
2925 //TODO pass fields via argument
2926 List<Item> *fields= qep_tab ? qep_tab[-1].fields : join->fields;
2927
2928 if (!end_of_records)
2929 {
2930 int error;
2931
2932 if (join->tables &&
2933 // In case filesort has been used and zeroed quick():
2934 (join->qep_tab[0].quick_optim() &&
2935 join->qep_tab[0].quick_optim()->is_loose_index_scan()))
2936 {
2937 // Copy non-aggregated fields when loose index scan is used.
2938 if (copy_fields(&join->tmp_table_param, join->thd))
2939 DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
2940 }
2941 // Use JOIN's HAVING for the case of tableless SELECT.
2942 if (join->having_cond && join->having_cond->val_int() == 0)
2943 DBUG_RETURN(NESTED_LOOP_OK); // Didn't match having
2944 error=0;
2945 if (join->do_send_rows)
2946 error= join->select_lex->query_result()->send_data(*fields);
2947 if (error)
2948 DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
2949
2950 ++join->send_records;
2951 if (join->send_records >= join->unit->select_limit_cnt &&
2952 !join->do_send_rows)
2953 {
2954 /*
2955 If we have used Priority Queue for optimizing order by with limit,
2956 then stop here, there are no more records to consume.
2957 When this optimization is used, end_send is called on the next
2958 join_tab.
2959 */
2960 if (join->order &&
2961 join->calc_found_rows &&
2962 qep_tab > join->qep_tab &&
2963 qep_tab[-1].filesort &&
2964 qep_tab[-1].filesort->using_pq)
2965 {
2966 DBUG_PRINT("info", ("filesort NESTED_LOOP_QUERY_LIMIT"));
2967 DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT);
2968 }
2969 }
2970 if (join->send_records >= join->unit->select_limit_cnt &&
2971 join->do_send_rows)
2972 {
2973 if (join->calc_found_rows)
2974 {
2975 QEP_TAB *first= &join->qep_tab[0];
2976 if ((join->primary_tables == 1) &&
2977 !join->sort_and_group &&
2978 !join->send_group_parts &&
2979 !join->having_cond &&
2980 !first->condition() &&
2981 !(first->quick()) &&
2982 (first->table()->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) &&
2983 (first->ref().key < 0))
2984 {
2985 /* Join over all rows in table; Return number of found rows */
2986 TABLE *table= first->table();
2987
2988 if (table->sort.has_filesort_result())
2989 {
2990 /* Using filesort */
2991 join->send_records= table->sort.found_records;
2992 }
2993 else
2994 {
2995 table->file->info(HA_STATUS_VARIABLE);
2996 join->send_records= table->file->stats.records;
2997 }
2998 }
2999 else
3000 {
3001 join->do_send_rows= 0;
3002 if (join->unit->fake_select_lex)
3003 join->unit->fake_select_lex->select_limit= 0;
3004 DBUG_RETURN(NESTED_LOOP_OK);
3005 }
3006 }
3007 DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT); // Abort nicely
3008 }
3009 else if (join->send_records >= join->fetch_limit)
3010 {
3011 /*
3012 There is a server side cursor and all rows for
3013 this fetch request are sent.
3014 */
3015 DBUG_RETURN(NESTED_LOOP_CURSOR_LIMIT);
3016 }
3017 }
3018 DBUG_RETURN(NESTED_LOOP_OK);
3019 }
3020
3021
3022 /* ARGSUSED */
3023 enum_nested_loop_state
end_send_group(JOIN * join,QEP_TAB * qep_tab,bool end_of_records)3024 end_send_group(JOIN *join, QEP_TAB *qep_tab, bool end_of_records)
3025 {
3026 int idx= -1;
3027 enum_nested_loop_state ok_code= NESTED_LOOP_OK;
3028 List<Item> *fields= qep_tab ? qep_tab[-1].fields : join->fields;
3029 DBUG_ENTER("end_send_group");
3030
3031
3032 if (!join->items3.is_null() && !join->set_group_rpa)
3033 {
3034 join->set_group_rpa= true;
3035 join->set_items_ref_array(join->items3);
3036 }
3037
3038 if (!join->first_record || end_of_records ||
3039 (idx=test_if_item_cache_changed(join->group_fields)) >= 0)
3040 {
3041 if (!join->group_sent &&
3042 (join->first_record ||
3043 (end_of_records && !join->grouped && !join->group_optimized_away)))
3044 {
3045 if (idx < (int) join->send_group_parts)
3046 {
3047 int error=0;
3048 {
3049 table_map save_nullinfo= 0;
3050 if (!join->first_record)
3051 {
3052 /*
3053 If this is a subquery, we need to save and later restore
3054 the const table NULL info before clearing the tables
3055 because the following executions of the subquery do not
3056 reevaluate constant fields. @see save_const_null_info
3057 and restore_const_null_info
3058 */
3059 if (join->select_lex->master_unit()->item && join->const_tables)
3060 save_const_null_info(join, &save_nullinfo);
3061
3062 // Calculate aggregate functions for no rows
3063 List_iterator_fast<Item> it(*fields);
3064 Item *item;
3065
3066 while ((item= it++))
3067 item->no_rows_in_result();
3068
3069 // Mark tables as containing only NULL values
3070 if (join->clear())
3071 DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
3072 }
3073 if (join->having_cond && join->having_cond->val_int() == 0)
3074 error= -1; // Didn't satisfy having
3075 else
3076 {
3077 if (join->do_send_rows)
3078 error= join->select_lex->query_result()->send_data(*fields);
3079 join->send_records++;
3080 join->group_sent= true;
3081 }
3082 if (join->rollup.state != ROLLUP::STATE_NONE && error <= 0)
3083 {
3084 if (join->rollup_send_data((uint) (idx+1)))
3085 error= 1;
3086 }
3087 if (save_nullinfo)
3088 restore_const_null_info(join, save_nullinfo);
3089
3090 }
3091 if (error > 0)
3092 DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
3093 if (end_of_records)
3094 DBUG_RETURN(NESTED_LOOP_OK);
3095 if (join->send_records >= join->unit->select_limit_cnt &&
3096 join->do_send_rows)
3097 {
3098 if (!join->calc_found_rows)
3099 DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT); // Abort nicely
3100 join->do_send_rows=0;
3101 join->unit->select_limit_cnt = HA_POS_ERROR;
3102 }
3103 else if (join->send_records >= join->fetch_limit)
3104 {
3105 /*
3106 There is a server side cursor and all rows
3107 for this fetch request are sent.
3108 */
3109 /*
3110 Preventing code duplication. When finished with the group reset
3111 the group functions and copy_fields. We fall through. bug #11904
3112 */
3113 ok_code= NESTED_LOOP_CURSOR_LIMIT;
3114 }
3115 }
3116 }
3117 else
3118 {
3119 if (end_of_records)
3120 DBUG_RETURN(NESTED_LOOP_OK);
3121 join->first_record=1;
3122 (void)(test_if_item_cache_changed(join->group_fields));
3123 }
3124 if (idx < (int) join->send_group_parts)
3125 {
3126 /*
3127 This branch is executed also for cursors which have finished their
3128 fetch limit - the reason for ok_code.
3129 */
3130 if (copy_fields(&join->tmp_table_param, join->thd))
3131 DBUG_RETURN(NESTED_LOOP_ERROR);
3132 if (init_sum_functions(join->sum_funcs, join->sum_funcs_end[idx+1]))
3133 DBUG_RETURN(NESTED_LOOP_ERROR);
3134 join->group_sent= false;
3135 DBUG_RETURN(ok_code);
3136 }
3137 }
3138 if (update_sum_func(join->sum_funcs))
3139 DBUG_RETURN(NESTED_LOOP_ERROR);
3140 DBUG_RETURN(NESTED_LOOP_OK);
3141 }
3142
cmp_field_value(Field * field,my_ptrdiff_t diff)3143 static bool cmp_field_value(Field *field, my_ptrdiff_t diff)
3144 {
3145 assert(field);
3146 /*
3147 Records are different when:
3148 1) NULL flags aren't the same
3149 2) length isn't the same
3150 3) data isn't the same
3151 */
3152 const bool value1_isnull= field->is_real_null();
3153 const bool value2_isnull= field->is_real_null(diff);
3154
3155 if (value1_isnull != value2_isnull) // 1
3156 return true;
3157 if (value1_isnull)
3158 return false; // Both values are null, no need to proceed.
3159
3160 const size_t value1_length= field->data_length();
3161 const size_t value2_length= field->data_length(diff);
3162
3163 if (field->type() == MYSQL_TYPE_JSON)
3164 {
3165 Field_json *json_field= down_cast<Field_json *>(field);
3166
3167 // Fetch the JSON value on the left side of the comparison.
3168 Json_wrapper left_wrapper;
3169 if (json_field->val_json(&left_wrapper))
3170 return true; /* purecov: inspected */
3171
3172 // Fetch the JSON value on the right side of the comparison.
3173 Json_wrapper right_wrapper;
3174 json_field->ptr+= diff;
3175 bool err= json_field->val_json(&right_wrapper);
3176 json_field->ptr-= diff;
3177 if (err)
3178 return true; /* purecov: inspected */
3179
3180 return (left_wrapper.compare(right_wrapper) != 0);
3181 }
3182
3183 // Trailing space can't be skipped and length is different
3184 if (!field->is_text_key_type() && value1_length != value2_length) // 2
3185 return true;
3186
3187 if (field->cmp_max(field->ptr, field->ptr + diff, // 3
3188 std::max(value1_length, value2_length)))
3189 return true;
3190
3191 return false;
3192 }
3193
3194 /**
3195 Compare GROUP BY in from tmp table's record[0] and record[1]
3196
3197 @returns
3198 true records are different
3199 false records are the same
3200 */
3201
group_rec_cmp(ORDER * group,uchar * rec0,uchar * rec1)3202 bool group_rec_cmp(ORDER *group, uchar *rec0, uchar *rec1)
3203 {
3204 my_ptrdiff_t diff= rec1 - rec0;
3205
3206 for (ORDER *grp= group; grp; grp= grp->next)
3207 {
3208 Item *item= *(grp->item);
3209 Field *field= item->get_tmp_table_field();
3210 if (cmp_field_value(field, diff))
3211 return true;
3212 }
3213 return false;
3214 }
3215
3216
3217 /**
3218 Compare GROUP BY in from tmp table's record[0] and record[1]
3219
3220 @returns
3221 true records are different
3222 false records are the same
3223 */
3224
table_rec_cmp(TABLE * table)3225 bool table_rec_cmp(TABLE *table)
3226 {
3227 my_ptrdiff_t diff= table->record[1] - table->record[0];
3228 Field **fields= table->visible_field_ptr();
3229
3230 for (uint i= 0; i < table->visible_field_count() ; i++)
3231 {
3232 Field *field= fields[i];
3233 if (cmp_field_value(field, diff))
3234 return true;
3235 }
3236 return false;
3237 }
3238
3239
3240 /**
3241 Generate hash for a field
3242
3243 @returns generated hash
3244 */
3245
unique_hash(Field * field,ulonglong * hash_val)3246 ulonglong unique_hash(Field *field, ulonglong *hash_val)
3247 {
3248 uchar *pos, *end;
3249 ulong seed1=0, seed2= 4;
3250 ulonglong crc= *hash_val;
3251
3252 if (field->is_null())
3253 {
3254 /*
3255 Change crc in a way different from an empty string or 0.
3256 (This is an optimisation; The code will work even if
3257 this isn't done)
3258 */
3259 crc=((crc << 8) + 511+
3260 (crc >> (8*sizeof(ha_checksum)-8)));
3261 goto finish;
3262 }
3263
3264 field->get_ptr(&pos);
3265 end= pos + field->data_length();
3266
3267 if (field->type() == MYSQL_TYPE_JSON)
3268 {
3269 Field_json *json_field= down_cast<Field_json *>(field);
3270
3271 crc= json_field->make_hash_key(hash_val);
3272 }
3273 else if (field->key_type() == HA_KEYTYPE_TEXT ||
3274 field->key_type() == HA_KEYTYPE_VARTEXT1 ||
3275 field->key_type() == HA_KEYTYPE_VARTEXT2)
3276 {
3277 field->charset()->coll->hash_sort(field->charset(), (const uchar*) pos,
3278 field->data_length(), &seed1, &seed2);
3279 crc^= seed1;
3280 }
3281 else
3282 while (pos != end)
3283 crc=((crc << 8) +
3284 (((uchar) *(uchar*) pos++))) +
3285 (crc >> (8*sizeof(ha_checksum)-8));
3286 finish:
3287 *hash_val= crc;
3288 return crc;
3289 }
3290
3291
3292 /* Generate hash for unique constraint according to group-by list */
3293
unique_hash_group(ORDER * group)3294 ulonglong unique_hash_group(ORDER *group)
3295 {
3296 ulonglong crc= 0;
3297 Field *field;
3298
3299 for (ORDER *ord= group; ord ; ord= ord->next)
3300 {
3301 Item *item= *(ord->item);
3302 field= item->get_tmp_table_field();
3303 assert(field);
3304 unique_hash(field, &crc);
3305 }
3306
3307 return crc;
3308 }
3309
3310
3311 /* Generate hash for unique_constraint for all visible fields of a table */
3312
unique_hash_fields(TABLE * table)3313 ulonglong unique_hash_fields(TABLE *table)
3314 {
3315 ulonglong crc= 0;
3316 Field **fields= table->visible_field_ptr();
3317
3318 for (uint i=0 ; i < table->visible_field_count() ; i++)
3319 unique_hash(fields[i], &crc);
3320
3321 return crc;
3322 }
3323
3324
3325 /**
3326 Check unique_constraint.
3327
3328 @details Calculates record's hash and checks whether the record given in
3329 table->record[0] is already present in the tmp table.
3330
3331 @param tab JOIN_TAB of tmp table to check
3332
3333 @notes This function assumes record[0] is already filled by the caller.
3334 Depending on presence of table->group, it's or full list of table's fields
3335 are used to calculate hash.
3336
3337 @returns
3338 false same record was found
3339 true record wasn't found
3340 */
3341
check_unique_constraint(TABLE * table)3342 bool check_unique_constraint(TABLE *table)
3343 {
3344 ulonglong hash;
3345
3346 if (!table->hash_field)
3347 return true;
3348
3349 if (table->no_keyread)
3350 return true;
3351
3352 if (table->group)
3353 hash= unique_hash_group(table->group);
3354 else
3355 hash= unique_hash_fields(table);
3356 table->hash_field->store(hash, true);
3357 int res= table->file->ha_index_read_map(table->record[1],
3358 table->hash_field->ptr,
3359 HA_WHOLE_KEY,
3360 HA_READ_KEY_EXACT);
3361 while (!res)
3362 {
3363 // Check whether records are the same.
3364 if (!(table->distinct ?
3365 table_rec_cmp(table) :
3366 group_rec_cmp(table->group, table->record[0], table->record[1])))
3367 return false; // skip it
3368 res= table->file->ha_index_next_same(table->record[1],
3369 table->hash_field->ptr,
3370 sizeof(hash));
3371 }
3372 return true;
3373 }
3374
3375
3376 /* ARGSUSED */
3377 static enum_nested_loop_state
end_write(JOIN * join,QEP_TAB * const qep_tab,bool end_of_records)3378 end_write(JOIN *join, QEP_TAB *const qep_tab, bool end_of_records)
3379 {
3380 TABLE *const table= qep_tab->table();
3381 DBUG_ENTER("end_write");
3382
3383 if (join->thd->killed) // Aborted by user
3384 {
3385 join->thd->send_kill_message();
3386 DBUG_RETURN(NESTED_LOOP_KILLED); /* purecov: inspected */
3387 }
3388 if (!end_of_records)
3389 {
3390 Temp_table_param *const tmp_tbl= qep_tab->tmp_table_param;
3391 if (copy_fields(tmp_tbl, join->thd))
3392 DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
3393 if (copy_funcs(tmp_tbl->items_to_copy, join->thd))
3394 DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
3395
3396 if (!qep_tab->having || qep_tab->having->val_int())
3397 {
3398 int error;
3399 join->found_records++;
3400
3401 if (!check_unique_constraint(table))
3402 goto end; // skip it
3403
3404 if ((error=table->file->ha_write_row(table->record[0])))
3405 {
3406 if (table->file->is_ignorable_error(error))
3407 goto end;
3408 if (create_ondisk_from_heap(join->thd, table,
3409 tmp_tbl->start_recinfo,
3410 &tmp_tbl->recinfo,
3411 error, TRUE, NULL))
3412 DBUG_RETURN(NESTED_LOOP_ERROR); // Not a table_is_full error
3413 table->s->uniques=0; // To ensure rows are the same
3414 }
3415 if (++qep_tab->send_records >=
3416 tmp_tbl->end_write_records &&
3417 join->do_send_rows)
3418 {
3419 if (!join->calc_found_rows)
3420 DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT);
3421 join->do_send_rows=0;
3422 join->unit->select_limit_cnt = HA_POS_ERROR;
3423 DBUG_RETURN(NESTED_LOOP_OK);
3424 }
3425 }
3426 }
3427 end:
3428 DBUG_RETURN(NESTED_LOOP_OK);
3429 }
3430
3431
3432 /* ARGSUSED */
3433 /** Group by searching after group record and updating it if possible. */
3434
3435 static enum_nested_loop_state
end_update(JOIN * join,QEP_TAB * const qep_tab,bool end_of_records)3436 end_update(JOIN *join, QEP_TAB *const qep_tab, bool end_of_records)
3437 {
3438 TABLE *const table= qep_tab->table();
3439 ORDER *group;
3440 int error;
3441 bool group_found= false;
3442 DBUG_ENTER("end_update");
3443
3444 if (end_of_records)
3445 DBUG_RETURN(NESTED_LOOP_OK);
3446 if (join->thd->killed) // Aborted by user
3447 {
3448 join->thd->send_kill_message();
3449 DBUG_RETURN(NESTED_LOOP_KILLED); /* purecov: inspected */
3450 }
3451
3452 Temp_table_param *const tmp_tbl= qep_tab->tmp_table_param;
3453 join->found_records++;
3454 if (copy_fields(tmp_tbl, join->thd)) // Groups are copied twice.
3455 DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
3456
3457 /* Make a key of group index */
3458 if (table->hash_field)
3459 {
3460 /*
3461 We need to call to copy_funcs here in order to get correct value for
3462 hash_field. However, this call isn't needed so early when hash_field
3463 isn't used as it would cause unnecessary additional evaluation of
3464 functions to be copied when 2nd and further records in group are
3465 found.
3466 */
3467 if (copy_funcs(tmp_tbl->items_to_copy, join->thd))
3468 DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
3469 if (!check_unique_constraint(table))
3470 group_found= true;
3471 }
3472 else
3473 {
3474 for (group=table->group ; group ; group=group->next)
3475 {
3476 Item *item= *group->item;
3477 item->save_org_in_field(group->field);
3478 /* Store in the used key if the field was 0 */
3479 if (item->maybe_null)
3480 group->buff[-1]= (char) group->field->is_null();
3481 }
3482 const uchar *key= tmp_tbl->group_buff;
3483 if (!table->file->ha_index_read_map(table->record[1],
3484 key,
3485 HA_WHOLE_KEY,
3486 HA_READ_KEY_EXACT))
3487 group_found= true;
3488 }
3489 if (group_found)
3490 {
3491 /* Update old record */
3492 restore_record(table, record[1]);
3493 update_tmptable_sum_func(join->sum_funcs, table);
3494 if ((error=table->file->ha_update_row(table->record[1],
3495 table->record[0])))
3496 {
3497 // Old and new records are the same, ok to ignore
3498 if (error == HA_ERR_RECORD_IS_THE_SAME)
3499 DBUG_RETURN(NESTED_LOOP_OK);
3500 table->file->print_error(error, MYF(0)); /* purecov: inspected */
3501 DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
3502 }
3503 DBUG_RETURN(NESTED_LOOP_OK);
3504 }
3505
3506 /*
3507 Copy null bits from group key to table
3508 We can't copy all data as the key may have different format
3509 as the row data (for example as with VARCHAR keys)
3510 */
3511 if (!table->hash_field)
3512 {
3513 KEY_PART_INFO *key_part;
3514 for (group= table->group, key_part= table->key_info[0].key_part;
3515 group;
3516 group= group->next, key_part++)
3517 {
3518 if (key_part->null_bit)
3519 memcpy(table->record[0] + key_part->offset, group->buff, 1);
3520 }
3521 /* See comment on copy_funcs above. */
3522 if (copy_funcs(tmp_tbl->items_to_copy, join->thd))
3523 DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
3524 }
3525 init_tmptable_sum_functions(join->sum_funcs);
3526 if ((error=table->file->ha_write_row(table->record[0])))
3527 {
3528 /*
3529 If the error is HA_ERR_FOUND_DUPP_KEY and the grouping involves a
3530 TIMESTAMP field, throw a meaningfull error to user with the actual
3531 reason and the workaround. I.e, "Grouping on temporal is
3532 non-deterministic for timezones having DST. Please consider switching
3533 to UTC for this query". This is a temporary measure until we implement
3534 WL#13148 (Do all internal handling TIMESTAMP in UTC timezone), which
3535 will make such problem impossible.
3536 */
3537 if (error == HA_ERR_FOUND_DUPP_KEY)
3538 {
3539 for (group=table->group ; group ; group=group->next)
3540 {
3541 if (group->field->type() == MYSQL_TYPE_TIMESTAMP)
3542 {
3543 my_error(ER_GROUPING_ON_TIMESTAMP_IN_DST, MYF(0));
3544 DBUG_RETURN(NESTED_LOOP_ERROR);
3545 }
3546 }
3547 }
3548 if (create_ondisk_from_heap(join->thd, table,
3549 tmp_tbl->start_recinfo,
3550 &tmp_tbl->recinfo,
3551 error, FALSE, NULL))
3552 DBUG_RETURN(NESTED_LOOP_ERROR); // Not a table_is_full error
3553 /* Change method to update rows */
3554 if ((error= table->file->ha_index_init(0, 0)))
3555 {
3556 table->file->print_error(error, MYF(0));
3557 DBUG_RETURN(NESTED_LOOP_ERROR);
3558 }
3559 }
3560 qep_tab->send_records++;
3561 DBUG_RETURN(NESTED_LOOP_OK);
3562 }
3563
3564
3565 /* ARGSUSED */
3566 enum_nested_loop_state
end_write_group(JOIN * join,QEP_TAB * const qep_tab,bool end_of_records)3567 end_write_group(JOIN *join, QEP_TAB *const qep_tab, bool end_of_records)
3568 {
3569 TABLE *table= qep_tab->table();
3570 int idx= -1;
3571 DBUG_ENTER("end_write_group");
3572
3573 if (join->thd->killed)
3574 { // Aborted by user
3575 join->thd->send_kill_message();
3576 DBUG_RETURN(NESTED_LOOP_KILLED); /* purecov: inspected */
3577 }
3578 if (!join->first_record || end_of_records ||
3579 (idx=test_if_item_cache_changed(join->group_fields)) >= 0)
3580 {
3581 Temp_table_param *const tmp_tbl= qep_tab->tmp_table_param;
3582 if (join->first_record || (end_of_records && !join->grouped))
3583 {
3584 int send_group_parts= join->send_group_parts;
3585 if (idx < send_group_parts)
3586 {
3587 table_map save_nullinfo= 0;
3588 if (!join->first_record)
3589 {
3590 // Dead code or we need a test case for this branch
3591 assert(false);
3592 /*
3593 If this is a subquery, we need to save and later restore
3594 the const table NULL info before clearing the tables
3595 because the following executions of the subquery do not
3596 reevaluate constant fields. @see save_const_null_info
3597 and restore_const_null_info
3598 */
3599 if (join->select_lex->master_unit()->item && join->const_tables)
3600 save_const_null_info(join, &save_nullinfo);
3601
3602 // Calculate aggregate functions for no rows
3603 List_iterator_fast<Item> it(*(qep_tab-1)->fields);
3604 Item *item;
3605 while ((item= it++))
3606 item->no_rows_in_result();
3607
3608 // Mark tables as containing only NULL values
3609 if (join->clear())
3610 DBUG_RETURN(NESTED_LOOP_ERROR);
3611 }
3612 copy_sum_funcs(join->sum_funcs,
3613 join->sum_funcs_end[send_group_parts]);
3614 if (!qep_tab->having || qep_tab->having->val_int())
3615 {
3616 int error= table->file->ha_write_row(table->record[0]);
3617 if (error &&
3618 create_ondisk_from_heap(join->thd, table,
3619 tmp_tbl->start_recinfo,
3620 &tmp_tbl->recinfo,
3621 error, FALSE, NULL))
3622 DBUG_RETURN(NESTED_LOOP_ERROR);
3623 }
3624 if (join->rollup.state != ROLLUP::STATE_NONE)
3625 {
3626 if (join->rollup_write_data((uint) (idx+1), table))
3627 DBUG_RETURN(NESTED_LOOP_ERROR);
3628 }
3629 if (save_nullinfo)
3630 restore_const_null_info(join, save_nullinfo);
3631
3632 if (end_of_records)
3633 DBUG_RETURN(NESTED_LOOP_OK);
3634 }
3635 }
3636 else
3637 {
3638 if (end_of_records)
3639 DBUG_RETURN(NESTED_LOOP_OK);
3640 join->first_record=1;
3641
3642 (void)(test_if_item_cache_changed(join->group_fields));
3643 }
3644 if (idx < (int) join->send_group_parts)
3645 {
3646 if (copy_fields(tmp_tbl, join->thd))
3647 DBUG_RETURN(NESTED_LOOP_ERROR);
3648 if (copy_funcs(tmp_tbl->items_to_copy, join->thd))
3649 DBUG_RETURN(NESTED_LOOP_ERROR);
3650 if (init_sum_functions(join->sum_funcs, join->sum_funcs_end[idx+1]))
3651 DBUG_RETURN(NESTED_LOOP_ERROR);
3652 DBUG_RETURN(NESTED_LOOP_OK);
3653 }
3654 }
3655 if (update_sum_func(join->sum_funcs))
3656 DBUG_RETURN(NESTED_LOOP_ERROR);
3657 DBUG_RETURN(NESTED_LOOP_OK);
3658 }
3659
3660
3661 /*
3662 If not selecting by given key, create an index how records should be read
3663
3664 SYNOPSIS
3665 create_sort_index()
3666 thd Thread handler
3667 join Join with table to sort
3668 order How table should be sorted
3669 filesort_limit Max number of rows that needs to be sorted
3670 select_limit Max number of rows in final output
3671 Used to decide if we should use index or not
3672 IMPLEMENTATION
3673 - If there is an index that can be used, the first non-const join_tab in
3674 'join' is modified to use this index.
3675 - If no index, create with filesort() an index file that can be used to
3676 retrieve rows in order (should be done with 'read_record').
3677 The sorted data is stored in tab->table() and will be freed when calling
3678 free_io_cache(tab->table()).
3679
3680 RETURN VALUES
3681 0 ok
3682 -1 Some fatal error
3683 1 No records
3684 */
3685
3686 static int
create_sort_index(THD * thd,JOIN * join,QEP_TAB * tab)3687 create_sort_index(THD *thd, JOIN *join, QEP_TAB *tab)
3688 {
3689 ha_rows examined_rows, found_rows, returned_rows;
3690 TABLE *table;
3691 bool status;
3692 Filesort *fsort= tab->filesort;
3693 DBUG_ENTER("create_sort_index");
3694
3695 // One row, no need to sort. make_tmp_tables_info should already handle this.
3696 assert(!join->plan_is_const() && fsort);
3697 table= tab->table();
3698
3699 table->sort.io_cache=(IO_CACHE*) my_malloc(key_memory_TABLE_sort_io_cache,
3700 sizeof(IO_CACHE),
3701 MYF(MY_WME | MY_ZEROFILL));
3702 table->status=0; // May be wrong if quick_select
3703
3704 // If table has a range, move it to select
3705 if (tab->quick() && tab->ref().key >= 0)
3706 {
3707 if (tab->type() != JT_REF_OR_NULL && tab->type() != JT_FT)
3708 {
3709 assert(tab->type() == JT_REF || tab->type() == JT_EQ_REF);
3710 // Update ref value
3711 if ((cp_buffer_from_ref(thd, table, &tab->ref()) && thd->is_fatal_error))
3712 goto err; // out of memory
3713 }
3714 }
3715
3716 /* Fill schema tables with data before filesort if it's necessary */
3717 if ((join->select_lex->active_options() & OPTION_SCHEMA_TABLE) &&
3718 get_schema_tables_result(join, PROCESSED_BY_CREATE_SORT_INDEX))
3719 goto err;
3720
3721 if (table->s->tmp_table)
3722 table->file->info(HA_STATUS_VARIABLE); // Get record count
3723 status= filesort(thd, fsort, tab->keep_current_rowid,
3724 &examined_rows, &found_rows, &returned_rows);
3725 table->sort.found_records= returned_rows;
3726 tab->set_records(found_rows); // For SQL_CALC_ROWS
3727 tab->join()->examined_rows+=examined_rows;
3728 table->set_keyread(FALSE); // Restore if we used indexes
3729 if (tab->type() == JT_FT)
3730 table->file->ft_end();
3731 else
3732 table->file->ha_index_or_rnd_end();
3733 DBUG_RETURN(status);
3734 err:
3735 DBUG_RETURN(-1);
3736 }
3737
3738
3739 /*****************************************************************************
3740 Remove duplicates from tmp table
3741 This should be recoded to add a unique index to the table and remove
3742 duplicates
3743 Table is a locked single thread table
3744 fields is the number of fields to check (from the end)
3745 *****************************************************************************/
3746
compare_record(TABLE * table,Field ** ptr)3747 static bool compare_record(TABLE *table, Field **ptr)
3748 {
3749 for (; *ptr ; ptr++)
3750 {
3751 if ((*ptr)->cmp_offset(table->s->rec_buff_length))
3752 return 1;
3753 }
3754 return 0;
3755 }
3756
copy_blobs(Field ** ptr)3757 static bool copy_blobs(Field **ptr)
3758 {
3759 for (; *ptr ; ptr++)
3760 {
3761 if ((*ptr)->flags & BLOB_FLAG)
3762 if (((Field_blob *) (*ptr))->copy())
3763 return 1; // Error
3764 }
3765 return 0;
3766 }
3767
free_blobs(Field ** ptr)3768 static void free_blobs(Field **ptr)
3769 {
3770 for (; *ptr ; ptr++)
3771 {
3772 if ((*ptr)->flags & BLOB_FLAG)
3773 ((Field_blob *) (*ptr))->mem_free();
3774 }
3775 }
3776
3777
3778 bool
remove_duplicates()3779 QEP_TAB::remove_duplicates()
3780 {
3781 bool error;
3782 ulong reclength,offset;
3783 uint field_count;
3784 List<Item> *field_list= (this-1)->fields;
3785 DBUG_ENTER("remove_duplicates");
3786
3787 assert(join()->tmp_tables > 0 && table()->s->tmp_table != NO_TMP_TABLE);
3788 THD_STAGE_INFO(join()->thd, stage_removing_duplicates);
3789
3790 TABLE *const tbl= table();
3791
3792 tbl->reginfo.lock_type=TL_WRITE;
3793
3794 /* Calculate how many saved fields there is in list */
3795 field_count=0;
3796 List_iterator<Item> it(*field_list);
3797 Item *item;
3798 while ((item=it++))
3799 {
3800 if (item->get_tmp_table_field() && ! item->const_item())
3801 field_count++;
3802 }
3803
3804 if (!field_count &&
3805 !join()->calc_found_rows &&
3806 !having)
3807 { // only const items with no OPTION_FOUND_ROWS
3808 join()->unit->select_limit_cnt= 1; // Only send first row
3809 DBUG_RETURN(false);
3810 }
3811 Field **first_field= tbl->field+ tbl->s->fields - field_count;
3812 offset= (field_count ?
3813 tbl->field[tbl->s->fields - field_count]->
3814 offset(tbl->record[0]) : 0);
3815 reclength= tbl->s->reclength-offset;
3816
3817 free_io_cache(tbl); // Safety
3818 tbl->file->info(HA_STATUS_VARIABLE);
3819 if (tbl->s->db_type() == heap_hton ||
3820 (!tbl->s->blob_fields &&
3821 ((ALIGN_SIZE(reclength) + HASH_OVERHEAD) * tbl->file->stats.records <
3822 join()->thd->variables.sortbuff_size)))
3823 error=remove_dup_with_hash_index(join()->thd, tbl,
3824 field_count, first_field,
3825 reclength, having);
3826 else
3827 error=remove_dup_with_compare(join()->thd, tbl, first_field, offset,
3828 having);
3829
3830 free_blobs(first_field);
3831 DBUG_RETURN(error);
3832 }
3833
3834
remove_dup_with_compare(THD * thd,TABLE * table,Field ** first_field,ulong offset,Item * having)3835 static bool remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field,
3836 ulong offset, Item *having)
3837 {
3838 handler *file=table->file;
3839 char *org_record,*new_record;
3840 uchar *record;
3841 int error;
3842 ulong reclength= table->s->reclength-offset;
3843 DBUG_ENTER("remove_dup_with_compare");
3844
3845 org_record=(char*) (record=table->record[0])+offset;
3846 new_record=(char*) table->record[1]+offset;
3847
3848 if ((error= file->ha_rnd_init(1)))
3849 goto err;
3850 error=file->ha_rnd_next(record);
3851 for (;;)
3852 {
3853 if (thd->killed)
3854 {
3855 thd->send_kill_message();
3856 error=0;
3857 goto err;
3858 }
3859 if (error)
3860 {
3861 if (error == HA_ERR_RECORD_DELETED)
3862 {
3863 error= file->ha_rnd_next(record);
3864 continue;
3865 }
3866 if (error == HA_ERR_END_OF_FILE)
3867 break;
3868 goto err;
3869 }
3870 if (having && !having->val_int())
3871 {
3872 if ((error=file->ha_delete_row(record)))
3873 goto err;
3874 error=file->ha_rnd_next(record);
3875 continue;
3876 }
3877 if (copy_blobs(first_field))
3878 {
3879 error=0;
3880 goto err;
3881 }
3882 memcpy(new_record,org_record,reclength);
3883
3884 /* Read through rest of file and mark duplicated rows deleted */
3885 bool found=0;
3886 for (;;)
3887 {
3888 if ((error=file->ha_rnd_next(record)))
3889 {
3890 if (error == HA_ERR_RECORD_DELETED)
3891 continue;
3892 if (error == HA_ERR_END_OF_FILE)
3893 break;
3894 goto err;
3895 }
3896 if (compare_record(table, first_field) == 0)
3897 {
3898 if ((error=file->ha_delete_row(record)))
3899 goto err;
3900 }
3901 else if (!found)
3902 {
3903 found=1;
3904 file->position(record); // Remember position
3905 }
3906 }
3907 if (!found)
3908 break; // End of file
3909 /* Restart search on next row */
3910 error=file->ha_rnd_pos(record, file->ref);
3911 }
3912
3913 file->extra(HA_EXTRA_NO_CACHE);
3914 DBUG_RETURN(false);
3915 err:
3916 file->extra(HA_EXTRA_NO_CACHE);
3917 if (file->inited)
3918 (void) file->ha_rnd_end();
3919 if (error)
3920 file->print_error(error,MYF(0));
3921 DBUG_RETURN(true);
3922 }
3923
3924
3925 /**
3926 Generate a hash index for each row to quickly find duplicate rows.
3927
3928 @note
3929 Note that this will not work on tables with blobs!
3930 */
3931
remove_dup_with_hash_index(THD * thd,TABLE * table,uint field_count,Field ** first_field,ulong key_length,Item * having)3932 static bool remove_dup_with_hash_index(THD *thd, TABLE *table,
3933 uint field_count,
3934 Field **first_field,
3935 ulong key_length,
3936 Item *having)
3937 {
3938 uchar *key_buffer, *key_pos, *record=table->record[0];
3939 int error;
3940 handler *file= table->file;
3941 ulong extra_length= ALIGN_SIZE(key_length)-key_length;
3942 uint *field_lengths,*field_length;
3943 HASH hash;
3944 DBUG_ENTER("remove_dup_with_hash_index");
3945
3946 if (!my_multi_malloc(key_memory_hash_index_key_buffer,
3947 MYF(MY_WME),
3948 &key_buffer,
3949 (uint) ((key_length + extra_length) *
3950 (long) file->stats.records),
3951 &field_lengths,
3952 (uint) (field_count*sizeof(*field_lengths)),
3953 NullS))
3954 DBUG_RETURN(true);
3955
3956 {
3957 Field **ptr;
3958 ulong total_length= 0;
3959 for (ptr= first_field, field_length=field_lengths ; *ptr ; ptr++)
3960 {
3961 uint length= (*ptr)->sort_length();
3962 (*field_length++)= length;
3963 total_length+= length;
3964 }
3965 DBUG_PRINT("info",("field_count: %u key_length: %lu total_length: %lu",
3966 field_count, key_length, total_length));
3967 assert(total_length <= key_length);
3968 key_length= total_length;
3969 extra_length= ALIGN_SIZE(key_length)-key_length;
3970 }
3971
3972 if (my_hash_init(&hash, &my_charset_bin, (uint) file->stats.records, 0,
3973 key_length, (my_hash_get_key) 0, 0, 0,
3974 key_memory_hash_index_key_buffer))
3975 {
3976 my_free(key_buffer);
3977 DBUG_RETURN(true);
3978 }
3979
3980 if ((error= file->ha_rnd_init(1)))
3981 goto err;
3982 key_pos=key_buffer;
3983 for (;;)
3984 {
3985 uchar *org_key_pos;
3986 if (thd->killed)
3987 {
3988 thd->send_kill_message();
3989 error=0;
3990 goto err;
3991 }
3992 if ((error=file->ha_rnd_next(record)))
3993 {
3994 if (error == HA_ERR_RECORD_DELETED)
3995 continue;
3996 if (error == HA_ERR_END_OF_FILE)
3997 break;
3998 goto err;
3999 }
4000 if (having && !having->val_int())
4001 {
4002 if ((error=file->ha_delete_row(record)))
4003 goto err;
4004 continue;
4005 }
4006
4007 /* copy fields to key buffer */
4008 org_key_pos= key_pos;
4009 field_length=field_lengths;
4010 for (Field **ptr= first_field ; *ptr ; ptr++)
4011 {
4012 (*ptr)->make_sort_key(key_pos,*field_length);
4013 key_pos+= *field_length++;
4014 }
4015 /* Check if it exists before */
4016 if (my_hash_search(&hash, org_key_pos, key_length))
4017 {
4018 /* Duplicated found ; Remove the row */
4019 if ((error=file->ha_delete_row(record)))
4020 goto err;
4021 }
4022 else
4023 {
4024 if (my_hash_insert(&hash, org_key_pos))
4025 goto err;
4026 }
4027 key_pos+=extra_length;
4028 }
4029 my_free(key_buffer);
4030 my_hash_free(&hash);
4031 file->extra(HA_EXTRA_NO_CACHE);
4032 (void) file->ha_rnd_end();
4033 DBUG_RETURN(false);
4034
4035 err:
4036 my_free(key_buffer);
4037 my_hash_free(&hash);
4038 file->extra(HA_EXTRA_NO_CACHE);
4039 if (file->inited)
4040 (void) file->ha_rnd_end();
4041 if (error)
4042 file->print_error(error,MYF(0));
4043 DBUG_RETURN(true);
4044 }
4045
4046
4047 /*
4048 eq_ref: Create the lookup key and check if it is the same as saved key
4049
4050 SYNOPSIS
4051 cmp_buffer_with_ref()
4052 tab Join tab of the accessed table
4053 table The table to read. This is usually tab->table(), except for
4054 semi-join when we might need to make a lookup in a temptable
4055 instead.
4056 tab_ref The structure with methods to collect index lookup tuple.
4057 This is usually table->ref, except for the case of when we're
4058 doing lookup into semi-join materialization table.
4059
4060 DESCRIPTION
4061 Used by eq_ref access method: create the index lookup key and check if
4062 we've used this key at previous lookup (If yes, we don't need to repeat
4063 the lookup - the record has been already fetched)
4064
4065 RETURN
4066 TRUE No cached record for the key, or failed to create the key (due to
4067 out-of-domain error)
4068 FALSE The created key is the same as the previous one (and the record
4069 is already in table->record)
4070 */
4071
4072 static bool
cmp_buffer_with_ref(THD * thd,TABLE * table,TABLE_REF * tab_ref)4073 cmp_buffer_with_ref(THD *thd, TABLE *table, TABLE_REF *tab_ref)
4074 {
4075 bool no_prev_key;
4076 if (!tab_ref->disable_cache)
4077 {
4078 if (!(no_prev_key= tab_ref->key_err))
4079 {
4080 /* Previous access found a row. Copy its key */
4081 memcpy(tab_ref->key_buff2, tab_ref->key_buff, tab_ref->key_length);
4082 }
4083 }
4084 else
4085 no_prev_key= TRUE;
4086 if ((tab_ref->key_err= cp_buffer_from_ref(thd, table, tab_ref)) ||
4087 no_prev_key)
4088 return 1;
4089 return memcmp(tab_ref->key_buff2, tab_ref->key_buff, tab_ref->key_length)
4090 != 0;
4091 }
4092
4093
4094 bool
cp_buffer_from_ref(THD * thd,TABLE * table,TABLE_REF * ref)4095 cp_buffer_from_ref(THD *thd, TABLE *table, TABLE_REF *ref)
4096 {
4097 enum enum_check_fields save_count_cuted_fields= thd->count_cuted_fields;
4098 thd->count_cuted_fields= CHECK_FIELD_IGNORE;
4099 my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->write_set);
4100 bool result= 0;
4101
4102 for (uint part_no= 0; part_no < ref->key_parts; part_no++)
4103 {
4104 store_key *s_key= ref->key_copy[part_no];
4105 if (!s_key)
4106 continue;
4107
4108 if (s_key->copy() & 1)
4109 {
4110 result= 1;
4111 break;
4112 }
4113 }
4114 thd->count_cuted_fields= save_count_cuted_fields;
4115 dbug_tmp_restore_column_map(table->write_set, old_map);
4116 return result;
4117 }
4118
4119
4120 /**
4121 allocate group fields or take prepared (cached).
4122
4123 @param main_join join of current select
4124 @param curr_join current join (join of current select or temporary copy
4125 of it)
4126
4127 @retval
4128 0 ok
4129 @retval
4130 1 failed
4131 */
4132
4133 bool
make_group_fields(JOIN * main_join,JOIN * curr_join)4134 make_group_fields(JOIN *main_join, JOIN *curr_join)
4135 {
4136 if (main_join->group_fields_cache.elements)
4137 {
4138 curr_join->group_fields= main_join->group_fields_cache;
4139 curr_join->sort_and_group= 1;
4140 }
4141 else
4142 {
4143 if (alloc_group_fields(curr_join, curr_join->group_list))
4144 return (1);
4145 main_join->group_fields_cache= curr_join->group_fields;
4146 }
4147 return (0);
4148 }
4149
4150
4151 /**
4152 Get a list of buffers for saveing last group.
4153
4154 Groups are saved in reverse order for easyer check loop.
4155 */
4156
4157 bool
alloc_group_fields(JOIN * join,ORDER * group)4158 alloc_group_fields(JOIN *join, ORDER *group)
4159 {
4160 if (group)
4161 {
4162 for (; group ; group=group->next)
4163 {
4164 Cached_item *tmp=new_Cached_item(join->thd, *group->item, FALSE);
4165 if (!tmp || join->group_fields.push_front(tmp))
4166 return TRUE;
4167 }
4168 }
4169 join->sort_and_group=1; /* Mark for do_select */
4170 return FALSE;
4171 }
4172
4173
4174 /*
4175 Test if a single-row cache of items changed, and update the cache.
4176
4177 @details Test if a list of items that typically represents a result
4178 row has changed. If the value of some item changed, update the cached
4179 value for this item.
4180
4181 @param list list of <item, cached_value> pairs stored as Cached_item.
4182
4183 @return -1 if no item changed
4184 @return index of the first item that changed
4185 */
4186
test_if_item_cache_changed(List<Cached_item> & list)4187 int test_if_item_cache_changed(List<Cached_item> &list)
4188 {
4189 DBUG_ENTER("test_if_item_cache_changed");
4190 List_iterator<Cached_item> li(list);
4191 int idx= -1,i;
4192 Cached_item *buff;
4193
4194 for (i=(int) list.elements-1 ; (buff=li++) ; i--)
4195 {
4196 if (buff->cmp())
4197 idx=i;
4198 }
4199 DBUG_PRINT("info", ("idx: %d", idx));
4200 DBUG_RETURN(idx);
4201 }
4202
4203
4204 /**
4205 Setup copy_fields to save fields at start of new group.
4206
4207 Setup copy_fields to save fields at start of new group
4208
4209 Only FIELD_ITEM:s and FUNC_ITEM:s needs to be saved between groups.
4210 Change old item_field to use a new field with points at saved fieldvalue
4211 This function is only called before use of send_result_set_metadata.
4212
4213 @param thd THD pointer
4214 @param param temporary table parameters
4215 @param ref_pointer_array array of pointers to top elements of filed list
4216 @param res_selected_fields new list of items of select item list
4217 @param res_all_fields new list of all items
4218 @param elements number of elements in select item list
4219 @param all_fields all fields list
4220
4221 @todo
4222 In most cases this result will be sent to the user.
4223 This should be changed to use copy_int or copy_real depending
4224 on how the value is to be used: In some cases this may be an
4225 argument in a group function, like: IF(ISNULL(col),0,COUNT(*))
4226
4227 @retval
4228 0 ok
4229 @retval
4230 !=0 error
4231 */
4232
4233 bool
setup_copy_fields(THD * thd,Temp_table_param * param,Ref_ptr_array ref_pointer_array,List<Item> & res_selected_fields,List<Item> & res_all_fields,uint elements,List<Item> & all_fields)4234 setup_copy_fields(THD *thd, Temp_table_param *param,
4235 Ref_ptr_array ref_pointer_array,
4236 List<Item> &res_selected_fields, List<Item> &res_all_fields,
4237 uint elements, List<Item> &all_fields)
4238 {
4239 Item *pos;
4240 List_iterator_fast<Item> li(all_fields);
4241 Copy_field *copy= NULL;
4242 Copy_field *copy_start MY_ATTRIBUTE((unused));
4243 res_selected_fields.empty();
4244 res_all_fields.empty();
4245 List_iterator_fast<Item> itr(res_all_fields);
4246 List<Item> extra_funcs;
4247 uint i, border= all_fields.elements - elements;
4248 DBUG_ENTER("setup_copy_fields");
4249
4250 if (param->field_count &&
4251 !(copy=param->copy_field= new Copy_field[param->field_count]))
4252 goto err2;
4253
4254 param->copy_funcs.empty();
4255 copy_start= copy;
4256 for (i= 0; (pos= li++); i++)
4257 {
4258 Field *field;
4259 uchar *tmp;
4260 Item *real_pos= pos->real_item();
4261 /*
4262 Aggregate functions can be substituted for fields (by e.g. temp tables).
4263 We need to filter those substituted fields out.
4264 */
4265 if (real_pos->type() == Item::FIELD_ITEM &&
4266 !(real_pos != pos &&
4267 ((Item_ref *)pos)->ref_type() == Item_ref::AGGREGATE_REF))
4268 {
4269 Item_field *item;
4270 if (!(item= new Item_field(thd, ((Item_field*) real_pos))))
4271 goto err;
4272 if (pos->type() == Item::REF_ITEM)
4273 {
4274 /* preserve the names of the ref when dereferncing */
4275 Item_ref *ref= (Item_ref *) pos;
4276 item->db_name= ref->db_name;
4277 item->table_name= ref->table_name;
4278 item->item_name= ref->item_name;
4279 }
4280 pos= item;
4281 if (item->field->flags & BLOB_FLAG)
4282 {
4283 if (!(pos= Item_copy::create(pos)))
4284 goto err;
4285 /*
4286 Item_copy_string::copy for function can call
4287 Item_copy_string::val_int for blob via Item_ref.
4288 But if Item_copy_string::copy for blob isn't called before,
4289 it's value will be wrong
4290 so let's insert Item_copy_string for blobs in the beginning of
4291 copy_funcs
4292 (to see full test case look at having.test, BUG #4358)
4293 */
4294 if (param->copy_funcs.push_front(pos))
4295 goto err;
4296 }
4297 else
4298 {
4299 /*
4300 set up save buffer and change result_field to point at
4301 saved value
4302 */
4303 field= item->field;
4304 item->result_field=field->new_field(thd->mem_root,field->table, 1);
4305 /*
4306 We need to allocate one extra byte for null handling.
4307 */
4308 if (!(tmp= static_cast<uchar*>(sql_alloc(field->pack_length() + 1))))
4309 goto err;
4310 if (copy)
4311 {
4312 assert (param->field_count > (uint) (copy - copy_start));
4313 copy->set(tmp, item->result_field);
4314 item->result_field->move_field(copy->to_ptr, copy->to_null_ptr, 1);
4315 copy++;
4316 }
4317 }
4318 }
4319 else if ((real_pos->type() == Item::FUNC_ITEM ||
4320 real_pos->type() == Item::SUBSELECT_ITEM ||
4321 real_pos->type() == Item::CACHE_ITEM ||
4322 real_pos->type() == Item::COND_ITEM) &&
4323 !real_pos->with_sum_func)
4324 { // Save for send fields
4325 pos= real_pos;
4326 /* TODO:
4327 In most cases this result will be sent to the user.
4328 This should be changed to use copy_int or copy_real depending
4329 on how the value is to be used: In some cases this may be an
4330 argument in a group function, like: IF(ISNULL(col),0,COUNT(*))
4331 */
4332 if (!(pos= Item_copy::create(pos)))
4333 goto err;
4334 if (i < border) // HAVING, ORDER and GROUP BY
4335 {
4336 if (extra_funcs.push_back(pos))
4337 goto err;
4338 }
4339 else if (param->copy_funcs.push_back(pos))
4340 goto err;
4341 }
4342 res_all_fields.push_back(pos);
4343 ref_pointer_array[((i < border)? all_fields.elements-i-1 : i-border)]=
4344 pos;
4345 }
4346 param->copy_field_end= copy;
4347
4348 for (i= 0; i < border; i++)
4349 itr++;
4350 itr.sublist(res_selected_fields, elements);
4351 /*
4352 Put elements from HAVING, ORDER BY and GROUP BY last to ensure that any
4353 reference used in these will resolve to a item that is already calculated
4354 */
4355 param->copy_funcs.concat(&extra_funcs);
4356
4357 DBUG_RETURN(0);
4358
4359 err:
4360 if (copy)
4361 delete [] param->copy_field; // This is never 0
4362 param->copy_field=0;
4363 err2:
4364 DBUG_RETURN(TRUE);
4365 }
4366
4367
4368 /**
4369 Make a copy of all simple SELECT'ed items.
4370
4371 This is done at the start of a new group so that we can retrieve
4372 these later when the group changes.
4373 @returns false if OK, true on error.
4374 */
4375
4376 bool
copy_fields(Temp_table_param * param,const THD * thd)4377 copy_fields(Temp_table_param *param, const THD *thd)
4378 {
4379 Copy_field *ptr=param->copy_field;
4380 Copy_field *end=param->copy_field_end;
4381
4382 assert((ptr != NULL && end >= ptr) || (ptr == NULL && end == NULL));
4383
4384 for (; ptr < end; ptr++)
4385 ptr->invoke_do_copy(ptr);
4386
4387 List_iterator_fast<Item> it(param->copy_funcs);
4388 Item_copy *item;
4389 bool is_error= thd->is_error();
4390 while (!is_error && (item= (Item_copy*) it++))
4391 is_error= item->copy(thd);
4392
4393 return is_error;
4394 }
4395
4396
4397 /**
4398 Change all funcs and sum_funcs to fields in tmp table, and create
4399 new list of all items.
4400
4401 @param thd THD pointer
4402 @param ref_pointer_array array of pointers to top elements of filed list
4403 @param res_selected_fields new list of items of select item list
4404 @param res_all_fields new list of all items
4405 @param elements number of elements in select item list
4406 @param all_fields all fields list
4407
4408 @retval
4409 0 ok
4410 @retval
4411 !=0 error
4412 */
4413
4414 bool
change_to_use_tmp_fields(THD * thd,Ref_ptr_array ref_pointer_array,List<Item> & res_selected_fields,List<Item> & res_all_fields,uint elements,List<Item> & all_fields)4415 change_to_use_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array,
4416 List<Item> &res_selected_fields,
4417 List<Item> &res_all_fields,
4418 uint elements, List<Item> &all_fields)
4419 {
4420 List_iterator_fast<Item> it(all_fields);
4421 Item *item_field,*item;
4422 DBUG_ENTER("change_to_use_tmp_fields");
4423
4424 res_selected_fields.empty();
4425 res_all_fields.empty();
4426
4427 uint border= all_fields.elements - elements;
4428 for (uint i= 0; (item= it++); i++)
4429 {
4430 Field *field;
4431 if (item->with_sum_func && item->type() != Item::SUM_FUNC_ITEM)
4432 item_field= item;
4433 else if (item->type() == Item::FIELD_ITEM)
4434 item_field= item->get_tmp_table_item(thd);
4435 else if (item->type() == Item::FUNC_ITEM &&
4436 ((Item_func*)item)->functype() == Item_func::SUSERVAR_FUNC)
4437 {
4438 field= item->get_tmp_table_field();
4439 if (field != NULL)
4440 {
4441 /*
4442 Replace "@:=<expression>" with "@:=<tmp table column>". Otherwise, we
4443 would re-evaluate <expression>, and if expression were a subquery, this
4444 would access already-unlocked tables.
4445 */
4446 Item_func_set_user_var* suv=
4447 new Item_func_set_user_var(thd, (Item_func_set_user_var*) item);
4448 Item_field *new_field= new Item_field(field);
4449 if (!suv || !new_field)
4450 DBUG_RETURN(true); // Fatal error
4451 List<Item> list;
4452 list.push_back(new_field);
4453 suv->set_arguments(list, true);
4454 item_field= suv;
4455 }
4456 else
4457 item_field= item;
4458 }
4459 else if ((field= item->get_tmp_table_field()))
4460 {
4461 if (item->type() == Item::SUM_FUNC_ITEM && field->table->group)
4462 item_field= ((Item_sum*) item)->result_item(field);
4463 else
4464 item_field= (Item*) new Item_field(field);
4465 if (!item_field)
4466 DBUG_RETURN(true); // Fatal error
4467
4468 if (item->real_item()->type() != Item::FIELD_ITEM)
4469 field->orig_table= 0;
4470 item_field->item_name= item->item_name;
4471 if (item->type() == Item::REF_ITEM)
4472 {
4473 Item_field *ifield= (Item_field *) item_field;
4474 Item_ref *iref= (Item_ref *) item;
4475 ifield->table_name= iref->table_name;
4476 ifield->db_name= iref->db_name;
4477 }
4478 #ifndef NDEBUG
4479 if (!item_field->item_name.is_set())
4480 {
4481 char buff[256];
4482 String str(buff,sizeof(buff),&my_charset_bin);
4483 str.length(0);
4484 item->print(&str, QT_ORDINARY);
4485 item_field->item_name.copy(str.ptr(), str.length());
4486 }
4487 #endif
4488 }
4489 else
4490 item_field= item;
4491
4492 res_all_fields.push_back(item_field);
4493 ref_pointer_array[((i < border)? all_fields.elements-i-1 : i-border)]=
4494 item_field;
4495 }
4496
4497 List_iterator_fast<Item> itr(res_all_fields);
4498 for (uint i= 0; i < border; i++)
4499 itr++;
4500 itr.sublist(res_selected_fields, elements);
4501 DBUG_RETURN(false);
4502 }
4503
4504
4505 /**
4506 Change all sum_func refs to fields to point at fields in tmp table.
4507 Change all funcs to be fields in tmp table.
4508
4509 @param thd THD pointer
4510 @param ref_pointer_array array of pointers to top elements of filed list
4511 @param res_selected_fields new list of items of select item list
4512 @param res_all_fields new list of all items
4513 @param elements number of elements in select item list
4514 @param all_fields all fields list
4515
4516 @retval
4517 0 ok
4518 @retval
4519 1 error
4520 */
4521
4522 bool
change_refs_to_tmp_fields(THD * thd,Ref_ptr_array ref_pointer_array,List<Item> & res_selected_fields,List<Item> & res_all_fields,uint elements,List<Item> & all_fields)4523 change_refs_to_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array,
4524 List<Item> &res_selected_fields,
4525 List<Item> &res_all_fields, uint elements,
4526 List<Item> &all_fields)
4527 {
4528 List_iterator_fast<Item> it(all_fields);
4529 Item *item, *new_item;
4530 res_selected_fields.empty();
4531 res_all_fields.empty();
4532
4533 uint i, border= all_fields.elements - elements;
4534 for (i= 0; (item= it++); i++)
4535 {
4536 res_all_fields.push_back(new_item= item->get_tmp_table_item(thd));
4537 ref_pointer_array[((i < border)? all_fields.elements-i-1 : i-border)]=
4538 new_item;
4539 }
4540
4541 List_iterator_fast<Item> itr(res_all_fields);
4542 for (i= 0; i < border; i++)
4543 itr++;
4544 itr.sublist(res_selected_fields, elements);
4545
4546 return thd->is_fatal_error;
4547 }
4548
4549
4550 /**
4551 Save NULL-row info for constant tables. Used in conjunction with
4552 restore_const_null_info() to restore constant table null_row and
4553 status values after temporarily marking rows as NULL. This is only
4554 done for const tables in subqueries because these values are not
4555 recalculated on next execution of the subquery.
4556
4557 @param join The join for which const tables are about to be
4558 marked as containing only NULL values
4559 @param[out] save_nullinfo Const tables that have null_row=false and
4560 STATUS_NULL_ROW set are tagged in this
4561 table_map so that the value can be
4562 restored by restore_const_null_info()
4563
4564 @see TABLE::set_null_row
4565 @see restore_const_null_info
4566 */
save_const_null_info(JOIN * join,table_map * save_nullinfo)4567 static void save_const_null_info(JOIN *join, table_map *save_nullinfo)
4568 {
4569 assert(join->const_tables);
4570
4571 for (uint tableno= 0; tableno < join->const_tables; tableno++)
4572 {
4573 QEP_TAB *const tab= join->qep_tab + tableno;
4574 TABLE *const table= tab->table();
4575 /*
4576 table->status and table->null_row must be in sync: either both set
4577 or none set. Otherwise, an additional table_map parameter is
4578 needed to save/restore_const_null_info() these separately
4579 */
4580 assert(table->has_null_row() ? (table->status & STATUS_NULL_ROW) :
4581 !(table->status & STATUS_NULL_ROW));
4582
4583 if (!table->has_null_row())
4584 *save_nullinfo|= tab->table_ref->map();
4585 }
4586 }
4587
4588 /**
4589 Restore NULL-row info for constant tables. Used in conjunction with
4590 save_const_null_info() to restore constant table null_row and status
4591 values after temporarily marking rows as NULL. This is only done for
4592 const tables in subqueries because these values are not recalculated
4593 on next execution of the subquery.
4594
4595 @param join The join for which const tables have been
4596 marked as containing only NULL values
4597 @param save_nullinfo Const tables that had null_row=false and
4598 STATUS_NULL_ROW set when
4599 save_const_null_info() was called
4600
4601 @see TABLE::set_null_row
4602 @see save_const_null_info
4603 */
restore_const_null_info(JOIN * join,table_map save_nullinfo)4604 static void restore_const_null_info(JOIN *join, table_map save_nullinfo)
4605 {
4606 assert(join->const_tables && save_nullinfo);
4607
4608 for (uint tableno= 0; tableno < join->const_tables; tableno++)
4609 {
4610 QEP_TAB *const tab= join->qep_tab + tableno;
4611 if ((save_nullinfo & tab->table_ref->map()))
4612 {
4613 /*
4614 The table had null_row=false and STATUS_NULL_ROW set when
4615 save_const_null_info was called
4616 */
4617 tab->table()->reset_null_row();
4618 }
4619 }
4620 }
4621
4622
4623 /****************************************************************************
4624 QEP_tmp_table implementation
4625 ****************************************************************************/
4626
4627 /**
4628 @brief Instantiate tmp table and start index scan if necessary
4629 @todo Tmp table always would be created, even for empty result. Extend
4630 executor to avoid tmp table creation when no rows were written
4631 into tmp table.
4632 @return
4633 true error
4634 false ok
4635 */
4636
4637 bool
prepare_tmp_table()4638 QEP_tmp_table::prepare_tmp_table()
4639 {
4640 TABLE *table= qep_tab->table();
4641 JOIN *join= qep_tab->join();
4642 int rc= 0;
4643
4644 Temp_table_param *const tmp_tbl= qep_tab->tmp_table_param;
4645 if (!table->is_created())
4646 {
4647 if (instantiate_tmp_table(table, tmp_tbl->keyinfo,
4648 tmp_tbl->start_recinfo,
4649 &tmp_tbl->recinfo,
4650 join->select_lex->active_options(),
4651 join->thd->variables.big_tables,
4652 &join->thd->opt_trace))
4653 return true;
4654 (void) table->file->extra(HA_EXTRA_WRITE_CACHE);
4655 empty_record(table);
4656 }
4657 /* If it wasn't already, start index scan for grouping using table index. */
4658 if (!table->file->inited &&
4659 ((table->group &&
4660 tmp_tbl->sum_func_count && table->s->keys) ||
4661 table->hash_field))
4662 rc= table->file->ha_index_init(0, 0);
4663 else
4664 {
4665 /* Start index scan in scanning mode */
4666 rc= table->file->ha_rnd_init(true);
4667 }
4668 if (rc)
4669 {
4670 table->file->print_error(rc, MYF(0));
4671 return true;
4672 }
4673 return false;
4674 }
4675
4676
4677 /**
4678 @brief Prepare table if necessary and call write_func to save record
4679
4680 @param end_of_record the end_of_record signal to pass to the writer
4681
4682 @return return one of enum_nested_loop_state.
4683 */
4684
4685 enum_nested_loop_state
put_record(bool end_of_records)4686 QEP_tmp_table::put_record(bool end_of_records)
4687 {
4688 // Lasy tmp table creation/initialization
4689 if (!qep_tab->table()->file->inited && prepare_tmp_table())
4690 return NESTED_LOOP_ERROR;
4691 enum_nested_loop_state rc= (*write_func)(qep_tab->join(), qep_tab,
4692 end_of_records);
4693 return rc;
4694 }
4695
4696
4697 /**
4698 @brief Finish rnd/index scan after accumulating records, switch ref_array,
4699 and send accumulated records further.
4700 @return return one of enum_nested_loop_state.
4701 */
4702
4703 enum_nested_loop_state
end_send()4704 QEP_tmp_table::end_send()
4705 {
4706 enum_nested_loop_state rc= NESTED_LOOP_OK;
4707 TABLE *table= qep_tab->table();
4708 JOIN *join= qep_tab->join();
4709
4710 // All records were stored, send them further
4711 int tmp, new_errno= 0;
4712
4713 if ((rc= put_record(true)) < NESTED_LOOP_OK)
4714 return rc;
4715
4716 if ((tmp= table->file->extra(HA_EXTRA_NO_CACHE)))
4717 {
4718 DBUG_PRINT("error",("extra(HA_EXTRA_NO_CACHE) failed"));
4719 new_errno= tmp;
4720 }
4721 if ((tmp= table->file->ha_index_or_rnd_end()))
4722 {
4723 DBUG_PRINT("error",("ha_index_or_rnd_end() failed"));
4724 new_errno= tmp;
4725 }
4726 if (new_errno)
4727 {
4728 table->file->print_error(new_errno,MYF(0));
4729 return NESTED_LOOP_ERROR;
4730 }
4731 // Update ref array
4732 join->set_items_ref_array(*qep_tab->ref_array);
4733 table->reginfo.lock_type= TL_UNLOCK;
4734
4735 bool in_first_read= true;
4736 while (rc == NESTED_LOOP_OK)
4737 {
4738 int error;
4739 if (in_first_read)
4740 {
4741 in_first_read= false;
4742 error= join_init_read_record(qep_tab);
4743 }
4744 else
4745 error= qep_tab->read_record.read_record(&qep_tab->read_record);
4746
4747 if (error > 0 || (join->thd->is_error())) // Fatal error
4748 rc= NESTED_LOOP_ERROR;
4749 else if (error < 0)
4750 break;
4751 else if (join->thd->killed) // Aborted by user
4752 {
4753 join->thd->send_kill_message();
4754 rc= NESTED_LOOP_KILLED;
4755 }
4756 else
4757 rc= evaluate_join_record(join, qep_tab);
4758 }
4759
4760 // Finish rnd scn after sending records
4761 if (table->file->inited)
4762 table->file->ha_rnd_end();
4763
4764 return rc;
4765 }
4766
4767
4768 /******************************************************************************
4769 Code for pfs_batch_update
4770 ******************************************************************************/
4771
4772
pfs_batch_update(JOIN * join)4773 bool QEP_TAB::pfs_batch_update(JOIN *join)
4774 {
4775 /*
4776 Use PFS batch mode unless
4777 1. tab is not an inner-most table, or
4778 2. a table has eq_ref or const access type, or
4779 3. this tab contains a subquery that accesses one or more tables
4780 */
4781
4782 return !((join->qep_tab + join->primary_tables - 1) != this || // 1
4783 this->type() == JT_EQ_REF || // 2
4784 this->type() == JT_CONST ||
4785 this->type() == JT_SYSTEM ||
4786 (condition() && condition()->has_subquery())); // 3
4787 }
4788
4789 /**
4790 @} (end of group Query_Executor)
4791 */
4792
4793