/* Copyright (c) 2000, 2021, Oracle and/or its affiliates. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License, version 2.0, as published by the Free Software Foundation. This program is also distributed with certain software (including but not limited to OpenSSL) that is licensed under separate terms, as designated in a particular file or component or in included license documentation. The authors of MySQL hereby grant you an additional permission to link the program and your derivative works with the separately licensed software that they have included with MySQL. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0, for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /** @file @brief Query execution @defgroup Query_Executor Query Executor @{ */ #include "sql_executor.h" #include "debug_sync.h" // DEBUG_SYNC #include "item_sum.h" // Item_sum #include "key.h" // key_cmp #include "log.h" // sql_print_error #include "opt_trace.h" // Opt_trace_object #include "sql_base.h" // fill_record #include "sql_join_buffer.h" // st_cache_field #include "sql_optimizer.h" // JOIN #include "sql_show.h" // get_schema_tables_result #include "sql_tmp_table.h" // create_tmp_table #include "json_dom.h" // Json_wrapper #include using std::max; using std::min; static void return_zero_rows(JOIN *join, List &fields); static void save_const_null_info(JOIN *join, table_map *save_nullinfo); static void restore_const_null_info(JOIN *join, table_map save_nullinfo); static int do_select(JOIN *join); static enum_nested_loop_state evaluate_join_record(JOIN *join, QEP_TAB *qep_tab); static enum_nested_loop_state evaluate_null_complemented_join_record(JOIN *join, QEP_TAB *qep_tab); static enum_nested_loop_state end_send(JOIN *join, QEP_TAB *qep_tab, bool end_of_records); static enum_nested_loop_state end_write(JOIN *join, QEP_TAB *qep_tab, bool end_of_records); static enum_nested_loop_state end_update(JOIN *join, QEP_TAB *qep_tab, bool end_of_records); static void copy_sum_funcs(Item_sum **func_ptr, Item_sum **end_ptr); static int read_system(TABLE *table); static int join_read_const(QEP_TAB *tab); static int read_const(TABLE *table, TABLE_REF *ref); static int join_read_key(QEP_TAB *tab); static int join_read_always_key(QEP_TAB *tab); static int join_no_more_records(READ_RECORD *info); static int join_read_next(READ_RECORD *info); static int join_read_next_same(READ_RECORD *info); static int join_read_prev(READ_RECORD *info); static int join_ft_read_first(QEP_TAB *tab); static int join_ft_read_next(READ_RECORD *info); static int join_read_always_key_or_null(QEP_TAB *tab); static int join_read_next_same_or_null(READ_RECORD *info); static int create_sort_index(THD *thd, JOIN *join, QEP_TAB *tab); static bool remove_dup_with_compare(THD *thd, TABLE *entry, Field **field, ulong offset,Item *having); static bool remove_dup_with_hash_index(THD *thd,TABLE *table, uint field_count, Field **first_field, ulong key_length,Item *having); static int join_read_linked_first(QEP_TAB *tab); static int join_read_linked_next(READ_RECORD *info); static int do_sj_reset(SJ_TMP_TABLE *sj_tbl); static bool cmp_buffer_with_ref(THD *thd, TABLE *table, TABLE_REF *tab_ref); /** Execute select, executor entry point. @todo When can we have here thd->net.report_error not zero? @note that EXPLAIN may come here (single-row derived table, uncorrelated scalar subquery in WHERE clause...). */ void JOIN::exec() { Opt_trace_context * const trace= &thd->opt_trace; Opt_trace_object trace_wrapper(trace); Opt_trace_object trace_exec(trace, "join_execution"); trace_exec.add_select_number(select_lex->select_number); Opt_trace_array trace_steps(trace, "steps"); List *columns_list= &fields_list; DBUG_ENTER("JOIN::exec"); assert(select_lex == thd->lex->current_select()); /* Check that we either - have no tables, or - have tables and have locked them, or - called for fake_select_lex, which may have temporary tables which do not need locking up front. */ assert(!tables || thd->lex->is_query_tables_locked() || select_lex == unit->fake_select_lex); THD_STAGE_INFO(thd, stage_executing); DEBUG_SYNC(thd, "before_join_exec"); set_executed(); if (prepare_result()) DBUG_VOID_RETURN; Query_result *const query_result= select_lex->query_result(); do_send_rows = unit->select_limit_cnt > 0; if (!tables_list && (tables || !select_lex->with_sum_func)) { // Only test of functions /* We have to test for 'conds' here as the WHERE may not be constant even if we don't have any tables for prepared statements or if conds uses something like 'rand()'. Don't evaluate the having clause here. return_zero_rows() should be called only for cases where there are no matching rows after evaluating all conditions except the HAVING clause. */ if (select_lex->cond_value != Item::COND_FALSE && (!where_cond || where_cond->val_int())) { if (query_result->send_result_set_metadata(*columns_list, Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) DBUG_VOID_RETURN; /* If the HAVING clause is either impossible or always true, then JOIN::having is set to NULL by optimize_cond. In this case JOIN::exec must check for JOIN::having_value, in the same way it checks for JOIN::cond_value. */ if (((select_lex->having_value != Item::COND_FALSE) && (!having_cond || having_cond->val_int())) && do_send_rows && query_result->send_data(fields_list)) error= 1; else { error= (int) query_result->send_eof(); send_records= calc_found_rows ? 1 : thd->get_sent_row_count(); } /* Query block (without union) always returns 0 or 1 row */ thd->current_found_rows= send_records; } else { return_zero_rows(this, *columns_list); } DBUG_VOID_RETURN; } if (zero_result_cause) { return_zero_rows(this, *columns_list); DBUG_VOID_RETURN; } /* Initialize examined rows here because the values from all join parts must be accumulated in examined_row_count. Hence every join iteration must count from zero. */ examined_rows= 0; /* XXX: When can we have here thd->is_error() not zero? */ if (thd->is_error()) { error= thd->is_error(); DBUG_VOID_RETURN; } THD_STAGE_INFO(thd, stage_sending_data); DBUG_PRINT("info", ("%s", thd->proc_info)); query_result->send_result_set_metadata(*fields, Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF); error= do_select(this); /* Accumulate the counts from all join iterations of all join parts. */ thd->inc_examined_row_count(examined_rows); DBUG_PRINT("counts", ("thd->examined_row_count: %lu", (ulong) thd->get_examined_row_count())); DBUG_VOID_RETURN; } bool JOIN::create_intermediate_table(QEP_TAB *const tab, List *tmp_table_fields, ORDER_with_src &tmp_table_group, bool save_sum_fields) { DBUG_ENTER("JOIN::create_intermediate_table"); THD_STAGE_INFO(thd, stage_creating_tmp_table); /* Pushing LIMIT to the temporary table creation is not applicable when there is ORDER BY or GROUP BY or there is no GROUP BY, but there are aggregate functions, because in all these cases we need all result rows. */ ha_rows tmp_rows_limit= ((order == NULL || skip_sort_order) && !tmp_table_group && !select_lex->with_sum_func) ? m_select_limit : HA_POS_ERROR; tab->tmp_table_param= new (thd->mem_root) Temp_table_param(tmp_table_param); tab->tmp_table_param->skip_create_table= true; TABLE* table= create_tmp_table(thd, tab->tmp_table_param, *tmp_table_fields, tmp_table_group, select_distinct && !group_list, save_sum_fields, select_lex->active_options(), tmp_rows_limit, ""); if (!table) DBUG_RETURN(true); tmp_table_param.using_outer_summary_function= tab->tmp_table_param->using_outer_summary_function; assert(tab->idx() > 0); tab[-1].next_select= sub_select_op; if (!(tab->op= new (thd->mem_root) QEP_tmp_table(tab))) goto err; tab->set_table(table); if (table->group) { explain_flags.set(tmp_table_group.src, ESP_USING_TMPTABLE); } if (table->distinct || select_distinct) { explain_flags.set(ESC_DISTINCT, ESP_USING_TMPTABLE); } if ((!group_list && !order && !select_distinct) || (select_lex->active_options() & (SELECT_BIG_RESULT | OPTION_BUFFER_RESULT))) { explain_flags.set(ESC_BUFFER_RESULT, ESP_USING_TMPTABLE); } /* if group or order on first table, sort first */ if (group_list && simple_group) { DBUG_PRINT("info",("Sorting for group")); THD_STAGE_INFO(thd, stage_sorting_for_group); if (ordered_index_usage != ordered_index_group_by && qep_tab[const_tables].type() != JT_CONST && // Don't sort 1 row add_sorting_to_table(const_tables, &group_list)) goto err; if (alloc_group_fields(this, group_list)) goto err; if (make_sum_func_list(all_fields, fields_list, true)) goto err; const bool need_distinct= !(tab->quick() && tab->quick()->is_agg_loose_index_scan()); if (prepare_sum_aggregators(sum_funcs, need_distinct)) goto err; if (setup_sum_funcs(thd, sum_funcs)) goto err; group_list= NULL; } else { if (make_sum_func_list(all_fields, fields_list, false)) goto err; const bool need_distinct= !(tab->quick() && tab->quick()->is_agg_loose_index_scan()); if (prepare_sum_aggregators(sum_funcs, need_distinct)) goto err; if (setup_sum_funcs(thd, sum_funcs)) goto err; if (!group_list && !table->distinct && order && simple_order) { DBUG_PRINT("info",("Sorting for order")); THD_STAGE_INFO(thd, stage_sorting_for_order); if (ordered_index_usage != ordered_index_order_by && add_sorting_to_table(const_tables, &order)) goto err; order= NULL; } } DBUG_RETURN(false); err: if (table != NULL) { free_tmp_table(thd, table); tab->set_table(NULL); } DBUG_RETURN(true); } /** Send all rollup levels higher than the current one to the client. @b SAMPLE @code SELECT a, b, c SUM(b) FROM t1 GROUP BY a,b WITH ROLLUP @endcode @param idx Level we are on: - 0 = Total sum level - 1 = First group changed (a) - 2 = Second group changed (a,b) @retval 0 ok @retval 1 If send_data_failed() */ int JOIN::rollup_send_data(uint idx) { uint i; for (i= send_group_parts ; i-- > idx ; ) { /* Get reference pointers to sum functions in place */ copy_ref_ptr_array(ref_ptrs, rollup.ref_pointer_arrays[i]); if ((!having_cond || having_cond->val_int())) { if (send_records < unit->select_limit_cnt && do_send_rows && select_lex->query_result()->send_data(rollup.fields[i])) return 1; send_records++; } } /* Restore ref_pointer_array */ set_items_ref_array(current_ref_ptrs); return 0; } /** Write all rollup levels higher than the current one to a temp table. @b SAMPLE @code SELECT a, b, SUM(c) FROM t1 GROUP BY a,b WITH ROLLUP @endcode @param idx Level we are on: - 0 = Total sum level - 1 = First group changed (a) - 2 = Second group changed (a,b) @param table reference to temp table @retval 0 ok @retval 1 if write_data_failed() */ int JOIN::rollup_write_data(uint idx, TABLE *table_arg) { uint i; for (i= send_group_parts ; i-- > idx ; ) { /* Get reference pointers to sum functions in place */ copy_ref_ptr_array(ref_ptrs, rollup.ref_pointer_arrays[i]); if ((!having_cond || having_cond->val_int())) { int write_error; Item *item; List_iterator_fast it(rollup.fields[i]); while ((item= it++)) { if (item->type() == Item::NULL_ITEM && item->is_result_field()) item->save_in_result_field(1); } copy_sum_funcs(sum_funcs_end[i+1], sum_funcs_end[i]); if ((write_error= table_arg->file->ha_write_row(table_arg->record[0]))) { if (create_ondisk_from_heap(thd, table_arg, tmp_table_param.start_recinfo, &tmp_table_param.recinfo, write_error, FALSE, NULL)) return 1; } } } /* Restore ref_pointer_array */ set_items_ref_array(current_ref_ptrs); return 0; } void JOIN::optimize_distinct() { for (int i= primary_tables - 1; i >= 0; --i) { QEP_TAB *last_tab= qep_tab + i; if (select_lex->select_list_tables & last_tab->table_ref->map()) break; last_tab->not_used_in_distinct= true; } /* Optimize "select distinct b from t1 order by key_part_1 limit #" */ if (order && skip_sort_order) { /* Should already have been optimized away */ assert(ordered_index_usage == ordered_index_order_by); if (ordered_index_usage == ordered_index_order_by) { order= NULL; } } } bool prepare_sum_aggregators(Item_sum **func_ptr, bool need_distinct) { Item_sum *func; DBUG_ENTER("prepare_sum_aggregators"); while ((func= *(func_ptr++))) { if (func->set_aggregator(need_distinct && func->has_with_distinct() ? Aggregator::DISTINCT_AGGREGATOR : Aggregator::SIMPLE_AGGREGATOR)) DBUG_RETURN(TRUE); } DBUG_RETURN(FALSE); } /****************************************************************************** Code for calculating functions ******************************************************************************/ /** Call ::setup for all sum functions. @param thd thread handler @param func_ptr sum function list @retval FALSE ok @retval TRUE error */ bool setup_sum_funcs(THD *thd, Item_sum **func_ptr) { Item_sum *func; DBUG_ENTER("setup_sum_funcs"); while ((func= *(func_ptr++))) { if (func->aggregator_setup(thd)) DBUG_RETURN(TRUE); } DBUG_RETURN(FALSE); } static void init_tmptable_sum_functions(Item_sum **func_ptr) { Item_sum *func; while ((func= *(func_ptr++))) func->reset_field(); } /** Update record 0 in tmp_table from record 1. */ static void update_tmptable_sum_func(Item_sum **func_ptr, TABLE *tmp_table MY_ATTRIBUTE((unused))) { Item_sum *func; while ((func= *(func_ptr++))) func->update_field(); } /** Copy result of sum functions to record in tmp_table. */ static void copy_sum_funcs(Item_sum **func_ptr, Item_sum **end_ptr) { for (; func_ptr != end_ptr ; func_ptr++) (*func_ptr)->save_in_result_field(1); return; } static bool init_sum_functions(Item_sum **func_ptr, Item_sum **end_ptr) { for (; func_ptr != end_ptr ;func_ptr++) { if ((*func_ptr)->reset_and_add()) return 1; } /* If rollup, calculate the upper sum levels */ for ( ; *func_ptr ; func_ptr++) { if ((*func_ptr)->aggregator_add()) return 1; } return 0; } static bool update_sum_func(Item_sum **func_ptr) { Item_sum *func; for (; (func= *func_ptr) ; func_ptr++) if (func->aggregator_add()) return 1; return 0; } /** Copy result of functions to record in tmp_table. Uses the thread pointer to check for errors in some of the val_xxx() methods called by the save_in_result_field() function. TODO: make the Item::val_xxx() return error code @param func_ptr array of the function Items to copy to the tmp table @param thd pointer to the current thread for error checking @retval FALSE if OK @retval TRUE on error */ bool copy_funcs(Func_ptr_array *func_ptr, const THD *thd) { for (size_t ix= 0; ix < func_ptr->size(); ++ix) { Item *func= func_ptr->at(ix); func->save_in_result_field(1); /* Need to check the THD error state because Item::val_xxx() don't return error code, but can generate errors TODO: change it for a real status check when Item::val_xxx() are extended to return status code. */ if (thd->is_error()) return TRUE; } return FALSE; } /* end_select-compatible function that writes the record into a sjm temptable SYNOPSIS end_sj_materialize() join The join join_tab Last join table end_of_records FALSE <=> This call is made to pass another record combination TRUE <=> EOF (no action) DESCRIPTION This function is used by semi-join materialization to capture suquery's resultset and write it into the temptable (that is, materialize it). NOTE This function is used only for semi-join materialization. Non-semijoin materialization uses different mechanism. RETURN NESTED_LOOP_OK NESTED_LOOP_ERROR */ static enum_nested_loop_state end_sj_materialize(JOIN *join, QEP_TAB *qep_tab, bool end_of_records) { int error; THD *thd= join->thd; Semijoin_mat_exec *sjm= qep_tab[-1].sj_mat_exec(); DBUG_ENTER("end_sj_materialize"); if (!end_of_records) { TABLE *table= sjm->table; List_iterator it(sjm->sj_nest->nested_join->sj_inner_exprs); Item *item; while ((item= it++)) { if (item->is_null()) DBUG_RETURN(NESTED_LOOP_OK); } fill_record(thd, table, table->visible_field_ptr(), sjm->sj_nest->nested_join->sj_inner_exprs, NULL, NULL); if (thd->is_error()) DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ if (!check_unique_constraint(table)) DBUG_RETURN(NESTED_LOOP_OK); if ((error= table->file->ha_write_row(table->record[0]))) { /* create_ondisk_from_heap will generate error if needed */ if (!table->file->is_ignorable_error(error)) { if (create_ondisk_from_heap(thd, table, sjm->table_param.start_recinfo, &sjm->table_param.recinfo, error, TRUE, NULL)) DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ /* Initialize the index, since create_ondisk_from_heap does not replicate the earlier index initialization */ if (table->hash_field) table->file->ha_index_init(0, false); } } } DBUG_RETURN(NESTED_LOOP_OK); } /** Check appearance of new constant items in multiple equalities of a condition after reading a constant table. The function retrieves the cond condition and for each encountered multiple equality checks whether new constants have appeared after reading the constant (single row) table tab. If so it adjusts the multiple equality appropriately. @param thd thread handler @param cond condition whose multiple equalities are to be checked @param tab constant table that has been read */ static bool update_const_equal_items(THD *thd, Item *cond, JOIN_TAB *tab) { if (!(cond->used_tables() & tab->table_ref->map())) return false; if (cond->type() == Item::COND_ITEM) { List *cond_list= ((Item_cond*) cond)->argument_list(); List_iterator_fast li(*cond_list); Item *item; while ((item= li++)) { if (update_const_equal_items(thd, item, tab)) return true; } } else if (cond->type() == Item::FUNC_ITEM && ((Item_cond*) cond)->functype() == Item_func::MULT_EQUAL_FUNC) { Item_equal *item_equal= (Item_equal *) cond; bool contained_const= item_equal->get_const() != NULL; if (item_equal->update_const(thd)) return true; if (!contained_const && item_equal->get_const()) { /* Update keys for range analysis */ Item_equal_iterator it(*item_equal); Item_field *item_field; while ((item_field= it++)) { Field *field= item_field->field; JOIN_TAB *stat= field->table->reginfo.join_tab; key_map possible_keys= field->key_start; possible_keys.intersect(field->table->keys_in_use_for_query); stat[0].const_keys.merge(possible_keys); stat[0].keys().merge(possible_keys); /* For each field in the multiple equality (for which we know that it is a constant) we have to find its corresponding key part, and set that key part in const_key_parts. */ if (!possible_keys.is_clear_all()) { TABLE *const table= field->table; for (Key_use *use= stat->keyuse(); use && use->table_ref == item_field->table_ref; use++) { if (possible_keys.is_set(use->key) && table->key_info[use->key].key_part[use->keypart].field == field) table->const_key_parts[use->key]|= use->keypart_map; } } } } } return false; } /** For some reason, e.g. due to an impossible WHERE clause, the tables cannot possibly contain any rows that will be in the result. This function is used to return with a result based on no matching rows (i.e., an empty result or one row with aggregates calculated without using rows in the case of implicit grouping) before the execution of nested loop join. This function may evaluate the HAVING clause and is only meant for result sets that are empty due to an impossible HAVING clause. Do not use it if HAVING has already been evaluated. @param join The join that does not produce a row @param fields Fields in result */ static void return_zero_rows(JOIN *join, List &fields) { DBUG_ENTER("return_zero_rows"); join->join_free(); /* Update results for FOUND_ROWS */ if (!join->send_row_on_empty_set()) { join->thd->current_found_rows= 0; } SELECT_LEX *const select= join->select_lex; if (!(select->query_result()->send_result_set_metadata(fields, Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))) { bool send_error= FALSE; if (join->send_row_on_empty_set()) { // Mark tables as containing only NULL values for (TABLE_LIST *table= select->leaf_tables; table; table= table->next_leaf) table->table->set_null_row(); // Calculate aggregate functions for no rows /* Must notify all fields that there are no rows (not only those that will be returned) because join->having may refer to fields that are not part of the result columns. */ List_iterator_fast it(join->all_fields); Item *item; while ((item= it++)) item->no_rows_in_result(); if (!join->having_cond || join->having_cond->val_int()) send_error= select->query_result()->send_data(fields); } if (!send_error) select->query_result()->send_eof(); // Should be safe } DBUG_VOID_RETURN; } /** @brief Setup write_func of QEP_tmp_table object @param join_tab JOIN_TAB of a tmp table @details Function sets up write_func according to how QEP_tmp_table object that is attached to the given join_tab will be used in the query. */ void setup_tmptable_write_func(QEP_TAB *tab) { JOIN *join= tab->join(); TABLE *table= tab->table(); QEP_tmp_table *op= (QEP_tmp_table *)tab->op; Temp_table_param *const tmp_tbl= tab->tmp_table_param; assert(table && op); if (table->group && tmp_tbl->sum_func_count && !tmp_tbl->precomputed_group_by) { /* Note for MyISAM tmp tables: if uniques is true keys won't be created. */ if (table->s->keys) { DBUG_PRINT("info",("Using end_update")); op->set_write_func(end_update); } } else if (join->sort_and_group && !tmp_tbl->precomputed_group_by) { DBUG_PRINT("info",("Using end_write_group")); op->set_write_func(end_write_group); } else { DBUG_PRINT("info",("Using end_write")); op->set_write_func(end_write); if (tmp_tbl->precomputed_group_by) { Item_sum **func_ptr= join->sum_funcs; Item_sum *func; while ((func= *(func_ptr++))) { tmp_tbl->items_to_copy->push_back(func); } } } } /** @details Rows produced by a join sweep may end up in a temporary table or be sent to a client. Setup the function of the nested loop join algorithm which handles final fully constructed and matched records. @return end_select function to use. This function can't fail. */ Next_select_func JOIN::get_end_select_func() { /* Choose method for presenting result to user. Use end_send_group if the query requires grouping (has a GROUP BY clause and/or one or more aggregate functions). Use end_send if the query should not be grouped. */ if (sort_and_group && !tmp_table_param.precomputed_group_by) { DBUG_PRINT("info",("Using end_send_group")); return end_send_group; } DBUG_PRINT("info",("Using end_send")); return end_send; } /** Make a join of all tables and write it on socket or to table. @retval 0 if ok @retval 1 if error is sent @retval -1 if error should be sent */ static int do_select(JOIN *join) { int rc= 0; enum_nested_loop_state error= NESTED_LOOP_OK; DBUG_ENTER("do_select"); join->send_records=0; if (join->plan_is_const() && !join->need_tmp) { Next_select_func end_select= join->get_end_select_func(); /* HAVING will be checked after processing aggregate functions, But WHERE should checkd here (we alredy have read tables) @todo: consider calling end_select instead of duplicating code */ if (!join->where_cond || join->where_cond->val_int()) { // HAVING will be checked by end_select error= (*end_select)(join, 0, 0); if (error >= NESTED_LOOP_OK) error= (*end_select)(join, 0, 1); /* If we don't go through evaluate_join_record(), do the counting here. join->send_records is increased on success in end_send(), so we don't touch it here. */ join->examined_rows++; assert(join->examined_rows <= 1); } else if (join->send_row_on_empty_set()) { table_map save_nullinfo= 0; /* If this is a subquery, we need to save and later restore the const table NULL info before clearing the tables because the following executions of the subquery do not reevaluate constant fields. @see save_const_null_info and restore_const_null_info */ if (join->select_lex->master_unit()->item && join->const_tables) save_const_null_info(join, &save_nullinfo); // Calculate aggregate functions for no rows List_iterator_fast it(*join->fields); Item *item; while ((item= it++)) item->no_rows_in_result(); // Mark tables as containing only NULL values if (join->clear()) error= NESTED_LOOP_ERROR; else { if (!join->having_cond || join->having_cond->val_int()) rc= join->select_lex->query_result()->send_data(*join->fields); if (save_nullinfo) restore_const_null_info(join, save_nullinfo); } } /* An error can happen when evaluating the conds (the join condition and piece of where clause relevant to this join table). */ if (join->thd->is_error()) error= NESTED_LOOP_ERROR; } else { QEP_TAB *qep_tab= join->qep_tab + join->const_tables; assert(join->primary_tables); error= join->first_select(join,qep_tab,0); if (error >= NESTED_LOOP_OK) error= join->first_select(join,qep_tab,1); } join->thd->current_found_rows= join->send_records; /* For "order by with limit", we cannot rely on send_records, but need to use the rowcount read originally into the join_tab applying the filesort. There cannot be any post-filtering conditions, nor any following join_tabs in this case, so this rowcount properly represents the correct number of qualifying rows. */ if (join->qep_tab && join->order) { // Save # of found records prior to cleanup QEP_TAB *sort_tab; uint const_tables= join->const_tables; // Take record count from first non constant table or from last tmp table if (join->tmp_tables > 0) sort_tab= &join->qep_tab[join->primary_tables + join->tmp_tables - 1]; else { assert(!join->plan_is_const()); sort_tab= &join->qep_tab[const_tables]; } if (sort_tab->filesort && join->calc_found_rows && sort_tab->filesort->sortorder && sort_tab->filesort->limit != HA_POS_ERROR) { join->thd->current_found_rows= sort_tab->records(); } } { /* The following will unlock all cursors if the command wasn't an update command */ join->join_free(); // Unlock all cursors } if (error == NESTED_LOOP_OK) { /* Sic: this branch works even if rc != 0, e.g. when send_data above returns an error. */ if (join->select_lex->query_result()->send_eof()) rc= 1; // Don't send error DBUG_PRINT("info",("%ld records output", (long) join->send_records)); } else rc= -1; #ifndef NDEBUG if (rc) { DBUG_PRINT("error",("Error: do_select() failed")); } #endif rc= join->thd->is_error() ? -1 : rc; DBUG_RETURN(rc); } /** @brief Accumulate full or partial join result in operation and send operation's result further. @param join pointer to the structure providing all context info for the query @param join_tab the JOIN_TAB object to which the operation is attached @param end_records TRUE <=> all records were accumulated, send them further @details This function accumulates records, one by one, in QEP operation's buffer by calling op->put_record(). When there is no more records to save, in this case the end_of_records argument == true, function tells QEP operation to send records further by calling op->send_records(). When all records are sent this function passes 'end_of_records' signal further by calling sub_select() with end_of_records argument set to true. After that op->end_send() is called to tell QEP operation that it could end internal buffer scan. @note This function is not expected to be called when dynamic range scan is used to scan join_tab because join cache is disabled for such scan and range scans aren't used for tmp tables. @see setup_join_buffering For caches the function implements the algorithmic schema for both Blocked Nested Loop Join and Batched Key Access Join. The difference can be seen only at the level of of the implementation of the put_record and send_records virtual methods for the cache object associated with the join_tab. @return return one of enum_nested_loop_state. */ enum_nested_loop_state sub_select_op(JOIN *join, QEP_TAB *qep_tab, bool end_of_records) { DBUG_ENTER("sub_select_op"); if (join->thd->killed) { /* The user has aborted the execution of the query */ join->thd->send_kill_message(); DBUG_RETURN(NESTED_LOOP_KILLED); } enum_nested_loop_state rc; QEP_operation *op= qep_tab->op; /* This function cannot be called if qep_tab has no associated operation */ assert(op != NULL); if (end_of_records) { rc= op->end_send(); if (rc >= NESTED_LOOP_OK) rc= sub_select(join, qep_tab, end_of_records); DBUG_RETURN(rc); } if (qep_tab->prepare_scan()) DBUG_RETURN(NESTED_LOOP_ERROR); /* setup_join_buffering() disables join buffering if QS_DYNAMIC_RANGE is enabled. */ assert(!qep_tab->dynamic_range()); rc= op->put_record(); DBUG_RETURN(rc); } /** Retrieve records ends with a given beginning from the result of a join. SYNPOSIS sub_select() join pointer to the structure providing all context info for the query join_tab the first next table of the execution plan to be retrieved end_records true when we need to perform final steps of retrival DESCRIPTION For a given partial join record consisting of records from the tables preceding the table join_tab in the execution plan, the function retrieves all matching full records from the result set and send them to the result set stream. @note The function effectively implements the final (n-k) nested loops of nested loops join algorithm, where k is the ordinal number of the join_tab table and n is the total number of tables in the join query. It performs nested loops joins with all conjunctive predicates from the where condition pushed as low to the tables as possible. E.g. for the query @code SELECT * FROM t1,t2,t3 WHERE t1.a=t2.a AND t2.b=t3.b AND t1.a BETWEEN 5 AND 9 @endcode the predicate (t1.a BETWEEN 5 AND 9) will be pushed to table t1, given the selected plan prescribes to nest retrievals of the joined tables in the following order: t1,t2,t3. A pushed down predicate are attached to the table which it pushed to, at the field join_tab->cond. When executing a nested loop of level k the function runs through the rows of 'join_tab' and for each row checks the pushed condition attached to the table. If it is false the function moves to the next row of the table. If the condition is true the function recursively executes (n-k-1) remaining embedded nested loops. The situation becomes more complicated if outer joins are involved in the execution plan. In this case the pushed down predicates can be checked only at certain conditions. Suppose for the query @code SELECT * FROM t1 LEFT JOIN (t2,t3) ON t3.a=t1.a WHERE t1>2 AND (t2.b>5 OR t2.b IS NULL) @endcode the optimizer has chosen a plan with the table order t1,t2,t3. The predicate P1=t1>2 will be pushed down to the table t1, while the predicate P2=(t2.b>5 OR t2.b IS NULL) will be attached to the table t2. But the second predicate can not be unconditionally tested right after a row from t2 has been read. This can be done only after the first row with t3.a=t1.a has been encountered. Thus, the second predicate P2 is supplied with a guarded value that are stored in the field 'found' of the first inner table for the outer join (table t2). When the first row with t3.a=t1.a for the current row of table t1 appears, the value becomes true. For now on the predicate is evaluated immediately after the row of table t2 has been read. When the first row with t3.a=t1.a has been encountered all conditions attached to the inner tables t2,t3 must be evaluated. Only when all of them are true the row is sent to the output stream. If not, the function returns to the lowest nest level that has a false attached condition. The predicates from on expressions are also pushed down. If in the the above example the on expression were (t3.a=t1.a AND t2.a=t1.a), then t1.a=t2.a would be pushed down to table t2, and without any guard. If after the run through all rows of table t2, the first inner table for the outer join operation, it turns out that no matches are found for the current row of t1, then current row from table t1 is complemented by nulls for t2 and t3. Then the pushed down predicates are checked for the composed row almost in the same way as it had been done for the first row with a match. The only difference is the predicates from on expressions are not checked. @par @b IMPLEMENTATION @par The function forms output rows for a current partial join of k tables tables recursively. For each partial join record ending with a certain row from join_tab it calls sub_select that builds all possible matching tails from the result set. To be able check predicates conditionally items of the class Item_func_trig_cond are employed. An object of this class is constructed from an item of class COND and a pointer to a guarding boolean variable. When the value of the guard variable is true the value of the object is the same as the value of the predicate, otherwise it's just returns true. To carry out a return to a nested loop level of join table t the pointer to t is remembered in the field 'return_tab' of the join structure. Consider the following query: @code SELECT * FROM t1, LEFT JOIN (t2, t3 LEFT JOIN (t4,t5) ON t5.a=t3.a) ON t4.a=t2.a WHERE (t2.b=5 OR t2.b IS NULL) AND (t4.b=2 OR t4.b IS NULL) @endcode Suppose the chosen execution plan dictates the order t1,t2,t3,t4,t5 and suppose for a given joined rows from tables t1,t2,t3 there are no rows in the result set yet. When first row from t5 that satisfies the on condition t5.a=t3.a is found, the pushed down predicate t4.b=2 OR t4.b IS NULL becomes 'activated', as well the predicate t4.a=t2.a. But the predicate (t2.b=5 OR t2.b IS NULL) can not be checked until t4.a=t2.a becomes true. In order not to re-evaluate the predicates that were already evaluated as attached pushed down predicates, a pointer to the the first most inner unmatched table is maintained in join_tab->first_unmatched. Thus, when the first row from t5 with t5.a=t3.a is found this pointer for t5 is changed from t4 to t2. @par @b STRUCTURE @b NOTES @par join_tab->first_unmatched points always backwards to the first inner table of the embedding nested join, if any. @param join pointer to the structure providing all context info for the query @param join_tab the first next table of the execution plan to be retrieved @param end_records true when we need to perform final steps of retrival @return return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS. */ enum_nested_loop_state sub_select(JOIN *join, QEP_TAB *const qep_tab,bool end_of_records) { DBUG_ENTER("sub_select"); qep_tab->table()->reset_null_row(); if (end_of_records) { enum_nested_loop_state nls= (*qep_tab->next_select)(join,qep_tab+1,end_of_records); DBUG_RETURN(nls); } READ_RECORD *info= &qep_tab->read_record; if (qep_tab->prepare_scan()) DBUG_RETURN(NESTED_LOOP_ERROR); if (qep_tab->starts_weedout()) { do_sj_reset(qep_tab->flush_weedout_table); } const plan_idx qep_tab_idx= qep_tab->idx(); join->return_tab= qep_tab_idx; qep_tab->not_null_compl= true; qep_tab->found_match= false; if (qep_tab->last_inner() != NO_PLAN_IDX) { /* qep_tab is the first inner table for an outer join operation. */ /* Set initial state of guard variables for this table.*/ qep_tab->found= false; /* Set first_unmatched for the last inner table of this group */ QEP_AT(qep_tab, last_inner()).first_unmatched= qep_tab_idx; } if (qep_tab->do_firstmatch() || qep_tab->do_loosescan()) { /* qep_tab is the first table of a LooseScan range, or has a "jump" address in a FirstMatch range. Reset the matching for this round of execution. */ QEP_AT(qep_tab, match_tab).found_match= false; } join->thd->get_stmt_da()->reset_current_row_for_condition(); enum_nested_loop_state rc= NESTED_LOOP_OK; bool in_first_read= true; const bool pfs_batch_update= qep_tab->pfs_batch_update(join); if (pfs_batch_update) qep_tab->table()->file->start_psi_batch_mode(); while (rc == NESTED_LOOP_OK && join->return_tab >= qep_tab_idx) { int error; if (in_first_read) { in_first_read= false; error= (*qep_tab->read_first_record)(qep_tab); } else error= info->read_record(info); DBUG_EXECUTE_IF("bug13822652_1", join->thd->killed= THD::KILL_QUERY;); if (error > 0 || (join->thd->is_error())) // Fatal error rc= NESTED_LOOP_ERROR; else if (error < 0) break; else if (join->thd->killed) // Aborted by user { join->thd->send_kill_message(); rc= NESTED_LOOP_KILLED; } else { if (qep_tab->keep_current_rowid) qep_tab->table()->file->position(qep_tab->table()->record[0]); rc= evaluate_join_record(join, qep_tab); } } if (rc == NESTED_LOOP_OK && qep_tab->last_inner() != NO_PLAN_IDX && !qep_tab->found) rc= evaluate_null_complemented_join_record(join, qep_tab); if (pfs_batch_update) qep_tab->table()->file->end_psi_batch_mode(); DBUG_RETURN(rc); } /** @brief Prepare table to be scanned. @details This function is the place to do any work on the table that needs to be done before table can be scanned. Currently it only materialized derived tables and semi-joined subqueries and binds buffer for current rowid. @returns false - Ok, true - error */ bool QEP_TAB::prepare_scan() { // Check whether materialization is required. if (!materialize_table || materialized) return false; // Materialize table prior to reading it if ((*materialize_table)(this)) return true; materialized= true; // Bind to the rowid buffer managed by the TABLE object. if (copy_current_rowid) copy_current_rowid->bind_buffer(table()->file->ref); return false; } /** SemiJoinDuplicateElimination: Weed out duplicate row combinations SYNPOSIS do_sj_dups_weedout() thd Thread handle sjtbl Duplicate weedout table DESCRIPTION Try storing current record combination of outer tables (i.e. their rowids) in the temporary table. This records the fact that we've seen this record combination and also tells us if we've seen it before. RETURN -1 Error 1 The row combination is a duplicate (discard it) 0 The row combination is not a duplicate (continue) */ int do_sj_dups_weedout(THD *thd, SJ_TMP_TABLE *sjtbl) { int error; SJ_TMP_TABLE::TAB *tab= sjtbl->tabs; SJ_TMP_TABLE::TAB *tab_end= sjtbl->tabs_end; DBUG_ENTER("do_sj_dups_weedout"); if (sjtbl->is_confluent) { if (sjtbl->have_confluent_row) DBUG_RETURN(1); else { sjtbl->have_confluent_row= TRUE; DBUG_RETURN(0); } } uchar *ptr= sjtbl->tmp_table->visible_field_ptr()[0]->ptr; // Put the rowids tuple into table->record[0]: // 1. Store the length if (((Field_varstring*)(sjtbl->tmp_table->visible_field_ptr()[0]))-> length_bytes == 1) { *ptr= (uchar)(sjtbl->rowid_len + sjtbl->null_bytes); ptr++; } else { int2store(ptr, sjtbl->rowid_len + sjtbl->null_bytes); ptr += 2; } // 2. Zero the null bytes uchar *const nulls_ptr= ptr; if (sjtbl->null_bytes) { memset(ptr, 0, sjtbl->null_bytes); ptr += sjtbl->null_bytes; } // 3. Put the rowids for (uint i=0; tab != tab_end; tab++, i++) { handler *h= tab->qep_tab->table()->file; if (tab->qep_tab->table()->is_nullable() && tab->qep_tab->table()->has_null_row()) { /* It's a NULL-complemented row */ *(nulls_ptr + tab->null_byte) |= tab->null_bit; memset(ptr + tab->rowid_offset, 0, h->ref_length); } else { /* Copy the rowid value */ memcpy(ptr + tab->rowid_offset, h->ref, h->ref_length); } } if (!check_unique_constraint(sjtbl->tmp_table)) DBUG_RETURN(1); error= sjtbl->tmp_table->file->ha_write_row(sjtbl->tmp_table->record[0]); if (error) { /* If this is a duplicate error, return immediately */ if (sjtbl->tmp_table->file->is_ignorable_error(error)) DBUG_RETURN(1); /* Other error than duplicate error: Attempt to create a temporary table. */ bool is_duplicate; if (create_ondisk_from_heap(thd, sjtbl->tmp_table, sjtbl->start_recinfo, &sjtbl->recinfo, error, TRUE, &is_duplicate)) DBUG_RETURN(-1); DBUG_RETURN(is_duplicate ? 1 : 0); } DBUG_RETURN(0); } /** SemiJoinDuplicateElimination: Reset the temporary table */ static int do_sj_reset(SJ_TMP_TABLE *sj_tbl) { DBUG_ENTER("do_sj_reset"); if (sj_tbl->tmp_table) { int rc= sj_tbl->tmp_table->file->ha_delete_all_rows(); DBUG_RETURN(rc); } sj_tbl->have_confluent_row= FALSE; DBUG_RETURN(0); } /** @brief Process one row of the nested loop join. This function will evaluate parts of WHERE/ON clauses that are applicable to the partial row on hand and in case of success submit this row to the next level of the nested loop. join_tab->return_tab may be modified to cause a return to a previous join_tab. @param join - The join object @param join_tab - The most inner join_tab being processed @return Nested loop state */ static enum_nested_loop_state evaluate_join_record(JOIN *join, QEP_TAB *const qep_tab) { bool not_used_in_distinct= qep_tab->not_used_in_distinct; ha_rows found_records=join->found_records; Item *condition= qep_tab->condition(); const plan_idx qep_tab_idx= qep_tab->idx(); bool found= TRUE; DBUG_ENTER("evaluate_join_record"); DBUG_PRINT("enter", ("join: %p join_tab index: %d table: %s cond: %p", join, static_cast(qep_tab_idx), qep_tab->table()->alias, condition)); if (condition) { found= MY_TEST(condition->val_int()); if (join->thd->killed) { join->thd->send_kill_message(); DBUG_RETURN(NESTED_LOOP_KILLED); } /* check for errors evaluating the condition */ if (join->thd->is_error()) DBUG_RETURN(NESTED_LOOP_ERROR); } if (found) { /* There is no condition on this join_tab or the attached pushed down condition is true => a match is found. */ while (qep_tab->first_unmatched != NO_PLAN_IDX && found) { /* The while condition is always false if join_tab is not the last inner join table of an outer join operation. */ QEP_TAB *first_unmatched= &QEP_AT(qep_tab, first_unmatched); /* Mark that a match for the current row of the outer table is found. This activates WHERE clause predicates attached the inner tables of the outer join. */ first_unmatched->found= true; for (QEP_TAB *tab= first_unmatched; tab <= qep_tab; tab++) { /* Check all predicates that have just been activated. Actually all predicates non-guarded by first_unmatched->found will be re-evaluated again. It could be fixed, but, probably, it's not worth doing now. not_exists_optimize has been created from a condition containing 'is_null'. This 'is_null' predicate is still present on any 'tab' with 'not_exists_optimize'. Furthermore, the usual rules for condition guards also applies for 'not_exists_optimize' -> When 'is_null==false' we know all cond. guards are open and we can apply the 'not_exists_optimize'. */ assert(!(tab->table()->reginfo.not_exists_optimize && !tab->condition())); if (tab->condition() && !tab->condition()->val_int()) { /* The condition attached to table tab is false */ if (tab->table()->reginfo.not_exists_optimize) { /* When not_exists_optimizer is set and a matching row is found, the outer row should be excluded from the result set: no need to explore this record, thus we don't call the next_select. And, no need to explore other following records of 'tab', so we set join->return_tab. As we set join_tab->found above, evaluate_join_record() at the upper level will not yield a NULL-complemented record. Note that the calculation below can set return_tab to -1 i.e. PRE_FIRST_PLAN_IDX. */ join->return_tab= qep_tab_idx - 1; DBUG_RETURN(NESTED_LOOP_OK); } if (tab == qep_tab) found= 0; else { /* Set a return point if rejected predicate is attached not to the last table of the current nest level. */ join->return_tab= tab->idx(); DBUG_RETURN(NESTED_LOOP_OK); } } /* check for errors evaluating the condition */ if (join->thd->is_error()) DBUG_RETURN(NESTED_LOOP_ERROR); } /* Check whether join_tab is not the last inner table for another embedding outer join. */ plan_idx f_u= first_unmatched->first_upper(); if (f_u != NO_PLAN_IDX && join->qep_tab[f_u].last_inner() != qep_tab_idx) f_u= NO_PLAN_IDX; qep_tab->first_unmatched= f_u; } plan_idx return_tab= join->return_tab; if (qep_tab->finishes_weedout() && found) { int res= do_sj_dups_weedout(join->thd, qep_tab->check_weed_out_table); if (res == -1) DBUG_RETURN(NESTED_LOOP_ERROR); else if (res == 1) found= FALSE; } else if (qep_tab->do_loosescan() && QEP_AT(qep_tab, match_tab).found_match) { /* Loosescan algorithm requires an access method that gives 'sorted' retrieval of keys, or an access method that provides only one row (which is inherently sorted). EQ_REF and LooseScan may happen if dependencies in subquery (e.g., outer join) prevents table pull-out. */ assert(qep_tab->use_order() || qep_tab->type() == JT_EQ_REF); /* Previous row combination for duplicate-generating range, generated a match. Compare keys of this row and previous row to determine if this is a duplicate that should be skipped. */ if (key_cmp(qep_tab->table()->key_info[qep_tab->index()].key_part, qep_tab->loosescan_buf, qep_tab->loosescan_key_len)) /* Keys do not match. Reset found_match for last table of duplicate-generating range, to avoid comparing keys until a new match has been found. */ QEP_AT(qep_tab, match_tab).found_match= false; else found= false; } /* It was not just a return to lower loop level when one of the newly activated predicates is evaluated as false (See above join->return_tab= tab). */ join->examined_rows++; DBUG_PRINT("counts", ("evaluate_join_record join->examined_rows++: %lu", (ulong) join->examined_rows)); if (found) { enum enum_nested_loop_state rc; // A match is found for the current partial join prefix. qep_tab->found_match= true; rc= (*qep_tab->next_select)(join, qep_tab+1, 0); join->thd->get_stmt_da()->inc_current_row_for_condition(); if (rc != NESTED_LOOP_OK) DBUG_RETURN(rc); /* check for errors evaluating the condition */ if (join->thd->is_error()) DBUG_RETURN(NESTED_LOOP_ERROR); if (qep_tab->do_loosescan() && QEP_AT(qep_tab,match_tab).found_match) { /* A match was found for a duplicate-generating range of a semijoin. Copy key to be able to determine whether subsequent rows will give duplicates that should be skipped. */ KEY *key= qep_tab->table()->key_info + qep_tab->index(); key_copy(qep_tab->loosescan_buf, qep_tab->table()->record[0], key, qep_tab->loosescan_key_len); } else if (qep_tab->do_firstmatch() && QEP_AT(qep_tab, match_tab).found_match) { /* We should return to join_tab->firstmatch_return after we have enumerated all the suffixes for current prefix row combination */ set_if_smaller(return_tab, qep_tab->firstmatch_return); } /* Test if this was a SELECT DISTINCT query on a table that was not in the field list; In this case we can abort if we found a row, as no new rows can be added to the result. */ if (not_used_in_distinct && found_records != join->found_records) set_if_smaller(return_tab, qep_tab_idx - 1); set_if_smaller(join->return_tab, return_tab); } else { join->thd->get_stmt_da()->inc_current_row_for_condition(); if (qep_tab->not_null_compl) { /* a NULL-complemented row is not in a table so cannot be locked */ qep_tab->read_record.unlock_row(qep_tab); } } } else { /* The condition pushed down to the table join_tab rejects all rows with the beginning coinciding with the current partial join. */ join->examined_rows++; join->thd->get_stmt_da()->inc_current_row_for_condition(); if (qep_tab->not_null_compl) qep_tab->read_record.unlock_row(qep_tab); } DBUG_RETURN(NESTED_LOOP_OK); } /** @details Construct a NULL complimented partial join record and feed it to the next level of the nested loop. This function is used in case we have an OUTER join and no matching record was found. */ static enum_nested_loop_state evaluate_null_complemented_join_record(JOIN *join, QEP_TAB *qep_tab) { /* The table join_tab is the first inner table of a outer join operation and no matches has been found for the current outer row. */ QEP_TAB *first_inner_tab= qep_tab; QEP_TAB *last_inner_tab= &QEP_AT(qep_tab, last_inner()); DBUG_ENTER("evaluate_null_complemented_join_record"); for ( ; qep_tab <= last_inner_tab ; qep_tab++) { // Make sure that the rowid buffer is bound, duplicates weedout needs it if (qep_tab->copy_current_rowid && !qep_tab->copy_current_rowid->buffer_is_bound()) qep_tab->copy_current_rowid->bind_buffer(qep_tab->table()->file->ref); /* Change the the values of guard predicate variables. */ qep_tab->found= true; qep_tab->not_null_compl= false; /* The outer row is complemented by nulls for each inner tables */ restore_record(qep_tab->table(),s->default_values); // Make empty record qep_tab->table()->set_null_row(); // For group by without error if (qep_tab->starts_weedout() && qep_tab > first_inner_tab) { // sub_select() has not performed a reset for this table. do_sj_reset(qep_tab->flush_weedout_table); } /* Check all attached conditions for inner table rows. */ if (qep_tab->condition() && !qep_tab->condition()->val_int()) { if (join->thd->killed) { join->thd->send_kill_message(); DBUG_RETURN(NESTED_LOOP_KILLED); } /* check for errors */ if (join->thd->is_error()) DBUG_RETURN(NESTED_LOOP_ERROR); else DBUG_RETURN(NESTED_LOOP_OK); } } qep_tab= last_inner_tab; /* From the point of view of the rest of execution, this record matches (it has been built and satisfies conditions, no need to do more evaluation on it). See similar code in evaluate_join_record(). */ plan_idx f_u= QEP_AT(qep_tab, first_unmatched).first_upper(); if (f_u != NO_PLAN_IDX && join->qep_tab[f_u].last_inner() != qep_tab->idx()) f_u= NO_PLAN_IDX; qep_tab->first_unmatched= f_u; /* The row complemented by nulls satisfies all conditions attached to inner tables. Finish evaluation of record and send it to be joined with remaining tables. Note that evaluate_join_record will re-evaluate the condition attached to the last inner table of the current outer join. This is not deemed to have a significant performance impact. */ const enum_nested_loop_state rc= evaluate_join_record(join, qep_tab); for (QEP_TAB *tab= first_inner_tab; tab <= last_inner_tab; tab++) tab->table()->reset_null_row(); DBUG_RETURN(rc); } /***************************************************************************** The different ways to read a record Returns -1 if row was not found, 0 if row was found and 1 on errors *****************************************************************************/ /** Help function when we get some an error from the table handler. */ int report_handler_error(TABLE *table, int error) { if (error == HA_ERR_END_OF_FILE || error == HA_ERR_KEY_NOT_FOUND) { table->status= STATUS_GARBAGE; return -1; // key not found; ok } /* Do not spam the error log with these temporary errors: LOCK_DEADLOCK LOCK_WAIT_TIMEOUT TABLE_DEF_CHANGED Also skip printing to error log if the current thread has been killed. */ if (error != HA_ERR_LOCK_DEADLOCK && error != HA_ERR_LOCK_WAIT_TIMEOUT && error != HA_ERR_TABLE_DEF_CHANGED && !table->in_use->killed) sql_print_error("Got error %d when reading table '%s'", error, table->s->path.str); table->file->print_error(error,MYF(0)); return 1; } int safe_index_read(QEP_TAB *tab) { int error; TABLE *table= tab->table(); if ((error=table->file->ha_index_read_map(table->record[0], tab->ref().key_buff, make_prev_keypart_map(tab->ref().key_parts), HA_READ_KEY_EXACT))) return report_handler_error(table, error); return 0; } /** Reads content of constant table @param tab table @param pos position of table in query plan @retval 0 ok, one row was found or one NULL-complemented row was created @retval -1 ok, no row was found and no NULL-complemented row was created @retval 1 error */ int join_read_const_table(JOIN_TAB *tab, POSITION *pos) { int error; DBUG_ENTER("join_read_const_table"); TABLE *table=tab->table(); table->const_table= true; table->reset_null_row(); table->status= STATUS_GARBAGE | STATUS_NOT_FOUND; if (table->reginfo.lock_type >= TL_WRITE_ALLOW_WRITE) { const enum_sql_command sql_command= tab->join()->thd->lex->sql_command; if (sql_command == SQLCOM_UPDATE_MULTI || sql_command == SQLCOM_DELETE_MULTI) { /* In a multi-UPDATE, if we represent "depends on" with "->", we have: "what columns to read (read_set)" -> "whether table will be updated on-the-fly or with tmp table" -> "whether to-be-updated columns are used by access path" "access path to table (range, ref, scan...)" -> "query execution plan" -> "what tables are const" -> "reading const tables" -> "what columns to read (read_set)". To break this loop, we always read all columns of a constant table if it is going to be updated. Another case is in multi-UPDATE and multi-DELETE, when the table has a trigger: bits of columns needed by the trigger are turned on in result->initialize_tables(), which has not yet been called when we do the reading now, so we must read all columns. */ bitmap_set_all(table->read_set); /* Virtual generated columns must be writable */ for (Field **vfield_ptr= table->vfield; vfield_ptr && *vfield_ptr; vfield_ptr++) bitmap_set_bit(table->write_set, (*vfield_ptr)->field_index); table->file->column_bitmaps_signal(); } } if (tab->type() == JT_SYSTEM) error= read_system(table); else { if (!table->key_read && table->covering_keys.is_set(tab->ref().key) && !table->no_keyread && (int) table->reginfo.lock_type <= (int) TL_READ_HIGH_PRIORITY) { table->set_keyread(TRUE); tab->set_index(tab->ref().key); } error= read_const(table, &tab->ref()); table->set_keyread(FALSE); } if (error) { /* Mark for EXPLAIN that the row was not found */ pos->filter_effect= 1.0; pos->rows_fetched= 0.0; pos->prefix_rowcount= 0.0; pos->ref_depend_map= 0; if (!tab->table_ref->outer_join || error > 0) DBUG_RETURN(error); } if (tab->join_cond() && !table->has_null_row()) { // We cannot handle outer-joined tables with expensive join conditions here: assert(!tab->join_cond()->is_expensive()); if (tab->join_cond()->val_int() == 0) table->set_null_row(); } /* Check appearance of new constant items in Item_equal objects */ JOIN *const join= tab->join(); THD *const thd= join->thd; if (join->where_cond && update_const_equal_items(thd, join->where_cond, tab)) DBUG_RETURN(1); TABLE_LIST *tbl; for (tbl= join->select_lex->leaf_tables; tbl; tbl= tbl->next_leaf) { TABLE_LIST *embedded; TABLE_LIST *embedding= tbl; do { embedded= embedding; if (embedded->join_cond_optim() && update_const_equal_items(thd, embedded->join_cond_optim(), tab)) DBUG_RETURN(1); embedding= embedded->embedding; } while (embedding && embedding->nested_join->join_list.head() == embedded); } DBUG_RETURN(0); } /** Read a constant table when there is at most one matching row, using a table scan. @param table Table to read @retval 0 Row was found @retval -1 Row was not found @retval 1 Got an error (other than row not found) during read */ static int read_system(TABLE *table) { int error; if (table->status & STATUS_GARBAGE) // If first read { if ((error=table->file->read_first_row(table->record[0], table->s->primary_key))) { if (error != HA_ERR_END_OF_FILE) return report_handler_error(table, error); table->set_null_row(); empty_record(table); // Make empty record return -1; } store_record(table,record[1]); } else if (!table->status) // Only happens with left join restore_record(table,record[1]); // restore old record table->reset_null_row(); return table->status ? -1 : 0; } /** Read a constant table when there is at most one matching row, using an index lookup. @param tab Table to read @retval 0 Row was found @retval -1 Row was not found @retval 1 Got an error (other than row not found) during read */ static int join_read_const(QEP_TAB *tab) { return read_const(tab->table(), &tab->ref()); } static int read_const(TABLE *table, TABLE_REF *ref) { int error; DBUG_ENTER("read_const"); if (table->status & STATUS_GARBAGE) // If first read { table->status= 0; if (cp_buffer_from_ref(table->in_use, table, ref)) error=HA_ERR_KEY_NOT_FOUND; else { error=table->file->ha_index_read_idx_map(table->record[0],ref->key, ref->key_buff, make_prev_keypart_map(ref->key_parts), HA_READ_KEY_EXACT); } if (error) { table->status= STATUS_NOT_FOUND; table->set_null_row(); empty_record(table); if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) { const int ret= report_handler_error(table, error); DBUG_RETURN(ret); } DBUG_RETURN(-1); } store_record(table,record[1]); } else if (!(table->status & ~STATUS_NULL_ROW)) // Only happens with left join { table->status=0; restore_record(table,record[1]); // restore old record } table->reset_null_row(); DBUG_RETURN(table->status ? -1 : 0); } /** Read row using unique key: eq_ref access method implementation @details This is the "read_first" function for the eq_ref access method. The difference from ref access function is that it has a one-element lookup cache (see cmp_buffer_with_ref) @param tab JOIN_TAB of the accessed table @retval 0 - Ok @retval -1 - Row not found @retval 1 - Error */ static int join_read_key(QEP_TAB *tab) { TABLE *const table= tab->table(); TABLE_REF *table_ref= &tab->ref(); int error; if (!table->file->inited) { /* Disable caching for inner table of outer join, since setting the NULL property on the table will overwrite NULL bits and hence destroy the current row for later use as a cached row. */ if (tab->table_ref->is_inner_table_of_outer_join()) table_ref->disable_cache= true; assert(!tab->use_order()); //Don't expect sort req. for single row. if ((error= table->file->ha_index_init(table_ref->key, tab->use_order()))) { (void) report_handler_error(table, error); return 1; } } /* We needn't do "Late NULLs Filtering" because eq_ref is restricted to indices on NOT NULL columns (see create_ref_for_key()). */ if (cmp_buffer_with_ref(tab->join()->thd, table, table_ref) || (table->status & (STATUS_GARBAGE | STATUS_NULL_ROW))) { if (table_ref->key_err) { table->status=STATUS_NOT_FOUND; return -1; } /* Moving away from the current record. Unlock the row in the handler if it did not match the partial WHERE. */ if (table_ref->has_record && table_ref->use_count == 0) { table->file->unlock_row(); table_ref->has_record= FALSE; } error= table->file->ha_index_read_map(table->record[0], table_ref->key_buff, make_prev_keypart_map(table_ref->key_parts), HA_READ_KEY_EXACT); if (error && error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) return report_handler_error(table, error); if (! error) { table_ref->has_record= TRUE; table_ref->use_count= 1; } } else if (table->status == 0) { assert(table_ref->has_record); table_ref->use_count++; } table->reset_null_row(); return table->status ? -1 : 0; } /** Since join_read_key may buffer a record, do not unlock it if it was not used in this invocation of join_read_key(). Only count locks, thus remembering if the record was left unused, and unlock already when pruning the current value of TABLE_REF buffer. @sa join_read_key() */ void join_read_key_unlock_row(QEP_TAB *tab) { assert(tab->ref().use_count); if (tab->ref().use_count) tab->ref().use_count--; } /** Rows from const tables are read once but potentially used multiple times during execution of a query. Ensure such rows are never unlocked during query execution. */ void join_const_unlock_row(QEP_TAB *tab) { assert(tab->type() == JT_CONST); } /** Read a table *assumed* to be included in execution of a pushed join. This is the counterpart of join_read_key() / join_read_always_key() for child tables in a pushed join. When the table access is performed as part of the pushed join, all 'linked' child colums are prefetched together with the parent row. The handler will then only format the row as required by MySQL and set 'table->status' accordingly. However, there may be situations where the prepared pushed join was not executed as assumed. It is the responsibility of the handler to handle these situation by letting ::index_read_pushed() then effectively do a plain old' index_read_map(..., HA_READ_KEY_EXACT); @param tab Table to read @retval 0 Row was found @retval -1 Row was not found @retval 1 Got an error (other than row not found) during read */ static int join_read_linked_first(QEP_TAB *tab) { int error; TABLE *table= tab->table(); DBUG_ENTER("join_read_linked_first"); assert(!tab->use_order()); // Pushed child can't be sorted if (!table->file->inited && (error= table->file->ha_index_init(tab->ref().key, tab->use_order()))) { (void) report_handler_error(table, error); DBUG_RETURN(error); } /* Perform "Late NULLs Filtering" (see internals manual for explanations) */ if (tab->ref().impossible_null_ref()) { DBUG_PRINT("info", ("join_read_linked_first null_rejected")); DBUG_RETURN(-1); } if (cp_buffer_from_ref(tab->join()->thd, table, &tab->ref())) { table->status=STATUS_NOT_FOUND; DBUG_RETURN(-1); } // 'read' itself is a NOOP: // handler::index_read_pushed() only unpack the prefetched row and set 'status' error=table->file->index_read_pushed(table->record[0], tab->ref().key_buff, make_prev_keypart_map(tab->ref().key_parts)); if (unlikely(error && error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)) DBUG_RETURN(report_handler_error(table, error)); table->reset_null_row(); int rc= table->status ? -1 : 0; DBUG_RETURN(rc); } static int join_read_linked_next(READ_RECORD *info) { TABLE *table= info->table; DBUG_ENTER("join_read_linked_next"); int error=table->file->index_next_pushed(table->record[0]); if (error) { if (unlikely(error != HA_ERR_END_OF_FILE)) DBUG_RETURN(report_handler_error(table, error)); table->status= STATUS_GARBAGE; DBUG_RETURN(-1); } DBUG_RETURN(error); } /* ref access method implementation: "read_first" function SYNOPSIS join_read_always_key() tab JOIN_TAB of the accessed table DESCRIPTION This is "read_fist" function for the "ref" access method. The functon must leave the index initialized when it returns. ref_or_null access implementation depends on that. RETURN 0 - Ok -1 - Row not found 1 - Error */ static int join_read_always_key(QEP_TAB *tab) { int error; TABLE *table= tab->table(); /* Initialize the index first */ if (!table->file->inited && (error= table->file->ha_index_init(tab->ref().key, tab->use_order()))) { (void) report_handler_error(table, error); return 1; } /* Perform "Late NULLs Filtering" (see internals manual for explanations) */ TABLE_REF *ref= &tab->ref(); if (ref->impossible_null_ref()) { DBUG_PRINT("info", ("join_read_always_key null_rejected")); return -1; } if (cp_buffer_from_ref(tab->join()->thd, table, ref)) return -1; if ((error= table->file->prepare_index_key_scan_map(tab->ref().key_buff, make_prev_keypart_map(tab->ref().key_parts)))) return report_handler_error(table, error); if ((error= table->file->ha_index_read_map(table->record[0], tab->ref().key_buff, make_prev_keypart_map(tab->ref().key_parts), HA_READ_KEY_EXACT))) { if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) return report_handler_error(table, error); return -1; /* purecov: inspected */ } return 0; } /** This function is used when optimizing away ORDER BY in SELECT * FROM t1 WHERE a=1 ORDER BY a DESC,b DESC. */ int join_read_last_key(QEP_TAB *tab) { int error; TABLE *table= tab->table(); if (!table->file->inited && (error= table->file->ha_index_init(tab->ref().key, tab->use_order()))) { (void) report_handler_error(table, error); return 1; } if (cp_buffer_from_ref(tab->join()->thd, table, &tab->ref())) return -1; if ((error=table->file->ha_index_read_last_map(table->record[0], tab->ref().key_buff, make_prev_keypart_map(tab->ref().key_parts)))) { if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) return report_handler_error(table, error); return -1; /* purecov: inspected */ } return 0; } /* ARGSUSED */ static int join_no_more_records(READ_RECORD *info MY_ATTRIBUTE((unused))) { return -1; } static int join_read_next_same(READ_RECORD *info) { int error; TABLE *table= info->table; QEP_TAB *tab=table->reginfo.qep_tab; if ((error= table->file->ha_index_next_same(table->record[0], tab->ref().key_buff, tab->ref().key_length))) { if (error != HA_ERR_END_OF_FILE) return report_handler_error(table, error); table->status= STATUS_GARBAGE; return -1; } return 0; } int join_read_prev_same(READ_RECORD *info) { int error; TABLE *table= info->table; QEP_TAB *tab=table->reginfo.qep_tab; /* Using ha_index_prev() for reading records from the table can cause performance issues if used in combination with ICP. The ICP code in the storage engine does not know when to stop reading from the index and a call to ha_index_prev() might cause the storage engine to read to the beginning of the index if no qualifying record is found. */ assert(table->file->pushed_idx_cond == NULL); if ((error= table->file->ha_index_prev(table->record[0]))) return report_handler_error(table, error); if (key_cmp_if_same(table, tab->ref().key_buff, tab->ref().key, tab->ref().key_length)) { table->status=STATUS_NOT_FOUND; error= -1; } return error; } int join_init_quick_read_record(QEP_TAB *tab) { /* This is for QS_DYNAMIC_RANGE, i.e., "Range checked for each record". The trace for the range analysis below this point will be printed with different ranges for every record to the left of this table in the join. */ THD *const thd= tab->join()->thd; #ifdef OPTIMIZER_TRACE Opt_trace_context * const trace= &thd->opt_trace; const bool disable_trace= tab->quick_traced_before && !trace->feature_enabled(Opt_trace_context::DYNAMIC_RANGE); Opt_trace_disable_I_S disable_trace_wrapper(trace, disable_trace); tab->quick_traced_before= true; Opt_trace_object wrapper(trace); Opt_trace_object trace_table(trace, "rows_estimation_per_outer_row"); trace_table.add_utf8_table(tab->table_ref); #endif /* If this join tab was read through a QUICK for the last record combination from earlier tables, deleting that quick will close the index. Otherwise, we need to close the index before the next join iteration starts because the handler object might be reused by a different access strategy. */ if (!tab->quick() && (tab->table()->file->inited != handler::NONE)) tab->table()->file->ha_index_or_rnd_end(); key_map needed_reg_dummy; QUICK_SELECT_I *old_qck= tab->quick(); QUICK_SELECT_I *qck; DEBUG_SYNC(thd, "quick_not_created"); const int rc= test_quick_select(thd, tab->keys(), 0, // empty table map HA_POS_ERROR, false, // don't force quick range ORDER::ORDER_NOT_RELEVANT, tab, tab->condition(), &needed_reg_dummy, &qck, tab->table()->force_index); assert(old_qck == NULL || old_qck != qck) ; tab->set_quick(qck); /* EXPLAIN CONNECTION is used to understand why a query is currently taking so much time. So it makes sense to show what the execution is doing now: is it a table scan or a range scan? A range scan on which index. So: below we want to change the type and quick visible in EXPLAIN, and for that, we need to take mutex and change type and quick_optim. */ DEBUG_SYNC(thd, "quick_created_before_mutex"); thd->lock_query_plan(); tab->set_type(qck ? calc_join_type(qck->get_type()) : JT_ALL); tab->set_quick_optim(); thd->unlock_query_plan(); delete old_qck; DEBUG_SYNC(thd, "quick_droped_after_mutex"); return (rc == -1) ? -1 : /* No possible records */ join_init_read_record(tab); } int read_first_record_seq(QEP_TAB *tab) { if (tab->read_record.table->file->ha_rnd_init(1)) return 1; return (*tab->read_record.read_record)(&tab->read_record); } /** @brief Prepare table for reading rows and read first record. @details Prior to reading the table following tasks are done, (in the order of execution): .) derived tables are materialized .) duplicates removed (tmp tables only) .) table is sorted with filesort (both non-tmp and tmp tables) After this have been done this function resets quick select, if it's present, sets up table reading functions, and reads first record. @retval 0 Ok @retval -1 End of records @retval 1 Error */ int join_init_read_record(QEP_TAB *tab) { int error; if (tab->distinct && tab->remove_duplicates()) // Remove duplicates. return 1; if (tab->filesort && tab->sort_table()) // Sort table. return 1; if (tab->quick() && (error= tab->quick()->reset())) { /* Ensures error status is propageted back to client */ report_handler_error(tab->table(), error); return 1; } if (init_read_record(&tab->read_record, tab->join()->thd, NULL, tab, 1, 1, FALSE)) return 1; return (*tab->read_record.read_record)(&tab->read_record); } /* This helper function materializes derived table/view and then calls read_first_record function to set up access to the materialized table. */ int join_materialize_derived(QEP_TAB *tab) { THD *const thd= tab->table()->in_use; TABLE_LIST *const derived= tab->table_ref; assert(derived->uses_materialization() && !tab->materialized); if (derived->materializable_is_const()) // Has been materialized by optimizer return NESTED_LOOP_OK; bool res= derived->materialize_derived(thd); res|= derived->cleanup_derived(); DEBUG_SYNC(thd, "after_materialize_derived"); return res ? NESTED_LOOP_ERROR : NESTED_LOOP_OK; } /* Helper function for materialization of a semi-joined subquery. @param tab JOIN_TAB referencing a materialized semi-join table @return Nested loop state */ int join_materialize_semijoin(QEP_TAB *tab) { DBUG_ENTER("join_materialize_semijoin"); Semijoin_mat_exec *const sjm= tab->sj_mat_exec(); QEP_TAB *const first= tab->join()->qep_tab + sjm->inner_table_index; QEP_TAB *const last= first + (sjm->table_count - 1); /* Set up the end_sj_materialize function after the last inner table, so that generated rows are inserted into the materialized table. */ last->next_select= end_sj_materialize; last->set_sj_mat_exec(sjm); // TODO: This violates comment for sj_mat_exec! if (tab->table()->hash_field) tab->table()->file->ha_index_init(0, 0); int rc; if ((rc= sub_select(tab->join(), first, false)) < 0) DBUG_RETURN(rc); if ((rc= sub_select(tab->join(), first, true)) < 0) DBUG_RETURN(rc); if (tab->table()->hash_field) tab->table()->file->ha_index_or_rnd_end(); last->next_select= NULL; last->set_sj_mat_exec(NULL); #if !defined(NDEBUG) || defined(HAVE_VALGRIND) // Fields of inner tables should not be read anymore: for (QEP_TAB *t= first; t <= last; t++) { TABLE *const inner_table= t->table(); TRASH(inner_table->record[0], inner_table->s->reclength); } #endif DBUG_RETURN(NESTED_LOOP_OK); } /** Check if access to this JOIN_TAB has to retrieve rows in sorted order as defined by the ordered index used to access this table. */ bool QEP_TAB::use_order() const { /* No need to require sorted access for single row reads being performed by const- or EQ_REF-accessed tables. */ if (type() == JT_EQ_REF || type() == JT_CONST || type() == JT_SYSTEM) return false; /* First non-const table requires sorted results if ORDER or GROUP BY use ordered index. */ if ((uint)idx() == join()->const_tables && join()->ordered_index_usage != JOIN::ordered_index_void) return true; /* LooseScan strategy for semijoin requires sorted results even if final result is not to be sorted. */ if (position()->sj_strategy == SJ_OPT_LOOSE_SCAN) return true; /* Fall through: Results don't have to be sorted */ return false; } /* Helper function for sorting table with filesort. */ bool QEP_TAB::sort_table() { DBUG_PRINT("info",("Sorting for index")); THD_STAGE_INFO(join()->thd, stage_creating_sort_index); assert(join()->ordered_index_usage != (filesort->order == join()->order ? JOIN::ordered_index_order_by : JOIN::ordered_index_group_by)); const bool rc= create_sort_index(join()->thd, join(), this) != 0; /* Filesort has filtered rows already (see skip_record() in find_all_keys()): so we can simply scan the cache, so have to set quick=NULL. But if we do this, we still need to delete the quick, now or later. We cannot do it now: the dtor of quick_index_merge would do free_io_cache, but the cache has to remain, because scan will read from it. So we delay deletion: we just let the "quick" continue existing in "quick_optim"; double benefit: - EXPLAIN will show the "quick_optim" - it will be deleted late enough. */ set_quick(NULL); set_condition(NULL); return rc; } int join_read_first(QEP_TAB *tab) { int error; TABLE *table=tab->table(); if (table->covering_keys.is_set(tab->index()) && !table->no_keyread) table->set_keyread(TRUE); table->status=0; tab->read_record.table=table; tab->read_record.record=table->record[0]; tab->read_record.read_record=join_read_next; if (!table->file->inited && (error= table->file->ha_index_init(tab->index(), tab->use_order()))) { (void) report_handler_error(table, error); return 1; } if ((error= table->file->prepare_index_scan())) { report_handler_error(table, error); return 1; } if ((error= table->file->ha_index_first(tab->table()->record[0]))) { if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) report_handler_error(table, error); return -1; } return 0; } static int join_read_next(READ_RECORD *info) { int error; if ((error= info->table->file->ha_index_next(info->record))) return report_handler_error(info->table, error); return 0; } int join_read_last(QEP_TAB *tab) { TABLE *table=tab->table(); int error; if (table->covering_keys.is_set(tab->index()) && !table->no_keyread) table->set_keyread(TRUE); table->status=0; tab->read_record.read_record=join_read_prev; tab->read_record.table=table; tab->read_record.record=table->record[0]; if (!table->file->inited && (error= table->file->ha_index_init(tab->index(), tab->use_order()))) { (void) report_handler_error(table, error); return 1; } if ((error= table->file->prepare_index_scan())) return report_handler_error(table, error); if ((error= table->file->ha_index_last(table->record[0]))) return report_handler_error(table, error); return 0; } static int join_read_prev(READ_RECORD *info) { int error; if ((error= info->table->file->ha_index_prev(info->record))) return report_handler_error(info->table, error); return 0; } static int join_ft_read_first(QEP_TAB *tab) { int error; TABLE *table= tab->table(); if (!table->file->inited && (error= table->file->ha_index_init(tab->ref().key, tab->use_order()))) { (void) report_handler_error(table, error); return 1; } table->file->ft_init(); if ((error= table->file->ft_read(table->record[0]))) return report_handler_error(table, error); return 0; } static int join_ft_read_next(READ_RECORD *info) { int error; if ((error= info->table->file->ft_read(info->table->record[0]))) return report_handler_error(info->table, error); return 0; } /** Reading of key with key reference and one part that may be NULL. */ static int join_read_always_key_or_null(QEP_TAB *tab) { int res; /* First read according to key which is NOT NULL */ *tab->ref().null_ref_key= 0; // Clear null byte if ((res= join_read_always_key(tab)) >= 0) return res; /* Then read key with null value */ *tab->ref().null_ref_key= 1; // Set null byte return safe_index_read(tab); } static int join_read_next_same_or_null(READ_RECORD *info) { int error; if ((error= join_read_next_same(info)) >= 0) return error; QEP_TAB *tab= info->table->reginfo.qep_tab; /* Test if we have already done a read after null key */ if (*tab->ref().null_ref_key) return -1; // All keys read *tab->ref().null_ref_key= 1; // Set null byte return safe_index_read(tab); // then read null keys } /** Pick the appropriate access method functions Sets the functions for the selected table access method @param join_tab JOIN_TAB for this QEP_TAB @todo join_init_read_record/join_read_(last|first) set tab->read_record.read_record internally. Do the same in other first record reading functions. */ void QEP_TAB::pick_table_access_method(const JOIN_TAB *join_tab) { ASSERT_BEST_REF_IN_JOIN_ORDER(join()); assert(join_tab == join()->best_ref[idx()]); assert(table()); assert(read_first_record == NULL); // Only some access methods support reversed access: assert(!join_tab->reversed_access || type() == JT_REF || type() == JT_INDEX_SCAN); // Fall through to set default access functions: switch (type()) { case JT_REF: if (join_tab->reversed_access) { read_first_record= join_read_last_key; read_record.read_record= join_read_prev_same; } else { read_first_record= join_read_always_key; read_record.read_record= join_read_next_same; } break; case JT_REF_OR_NULL: read_first_record= join_read_always_key_or_null; read_record.read_record= join_read_next_same_or_null; break; case JT_CONST: read_first_record= join_read_const; read_record.read_record= join_no_more_records; read_record.unlock_row= join_const_unlock_row; break; case JT_EQ_REF: read_first_record= join_read_key; read_record.read_record= join_no_more_records; read_record.unlock_row= join_read_key_unlock_row; break; case JT_FT: read_first_record= join_ft_read_first; read_record.read_record= join_ft_read_next; break; case JT_INDEX_SCAN: read_first_record= join_tab->reversed_access ? join_read_last : join_read_first; break; case JT_ALL: case JT_RANGE: case JT_INDEX_MERGE: read_first_record= (join_tab->use_quick == QS_DYNAMIC_RANGE) ? join_init_quick_read_record : join_init_read_record; break; default: assert(0); break; } } /** Install the appropriate 'linked' access method functions if this part of the join have been converted to pushed join. */ void QEP_TAB::set_pushed_table_access_method(void) { DBUG_ENTER("set_pushed_table_access_method"); assert(table()); /** Setup modified access function for children of pushed joins. */ const TABLE *pushed_root= table()->file->root_of_pushed_join(); if (pushed_root && pushed_root != table()) { /** Is child of a pushed join operation: Replace access functions with its linked counterpart. ... Which is effectively a NOOP as the row is already fetched together with the root of the linked operation. */ DBUG_PRINT("info", ("Modifying table access method for '%s'", table()->s->table_name.str)); assert(type() != JT_REF_OR_NULL); read_first_record= join_read_linked_first; read_record.read_record= join_read_linked_next; // Use the default unlock_row function read_record.unlock_row = rr_unlock_row; } DBUG_VOID_RETURN; } /***************************************************************************** DESCRIPTION Functions that end one nested loop iteration. Different functions are used to support GROUP BY clause and to redirect records to a table (e.g. in case of SELECT into a temporary table) or to the network client. See the enum_nested_loop_state enumeration for the description of return values. *****************************************************************************/ /* ARGSUSED */ static enum_nested_loop_state end_send(JOIN *join, QEP_TAB *qep_tab, bool end_of_records) { DBUG_ENTER("end_send"); /* When all tables are const this function is called with jointab == NULL. This function shouldn't be called for the first join_tab as it needs to get fields from previous tab. Note that qep_tab may be one past the last of qep_tab! So don't read its pointed content. But you can read qep_tab[-1] then. */ assert(qep_tab == NULL || qep_tab > join->qep_tab); //TODO pass fields via argument List *fields= qep_tab ? qep_tab[-1].fields : join->fields; if (!end_of_records) { int error; if (join->tables && // In case filesort has been used and zeroed quick(): (join->qep_tab[0].quick_optim() && join->qep_tab[0].quick_optim()->is_loose_index_scan())) { // Copy non-aggregated fields when loose index scan is used. if (copy_fields(&join->tmp_table_param, join->thd)) DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ } // Use JOIN's HAVING for the case of tableless SELECT. if (join->having_cond && join->having_cond->val_int() == 0) DBUG_RETURN(NESTED_LOOP_OK); // Didn't match having error=0; if (join->do_send_rows) error= join->select_lex->query_result()->send_data(*fields); if (error) DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ ++join->send_records; if (join->send_records >= join->unit->select_limit_cnt && !join->do_send_rows) { /* If we have used Priority Queue for optimizing order by with limit, then stop here, there are no more records to consume. When this optimization is used, end_send is called on the next join_tab. */ if (join->order && join->calc_found_rows && qep_tab > join->qep_tab && qep_tab[-1].filesort && qep_tab[-1].filesort->using_pq) { DBUG_PRINT("info", ("filesort NESTED_LOOP_QUERY_LIMIT")); DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT); } } if (join->send_records >= join->unit->select_limit_cnt && join->do_send_rows) { if (join->calc_found_rows) { QEP_TAB *first= &join->qep_tab[0]; if ((join->primary_tables == 1) && !join->sort_and_group && !join->send_group_parts && !join->having_cond && !first->condition() && !(first->quick()) && (first->table()->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) && (first->ref().key < 0)) { /* Join over all rows in table; Return number of found rows */ TABLE *table= first->table(); if (table->sort.has_filesort_result()) { /* Using filesort */ join->send_records= table->sort.found_records; } else { table->file->info(HA_STATUS_VARIABLE); join->send_records= table->file->stats.records; } } else { join->do_send_rows= 0; if (join->unit->fake_select_lex) join->unit->fake_select_lex->select_limit= 0; DBUG_RETURN(NESTED_LOOP_OK); } } DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT); // Abort nicely } else if (join->send_records >= join->fetch_limit) { /* There is a server side cursor and all rows for this fetch request are sent. */ DBUG_RETURN(NESTED_LOOP_CURSOR_LIMIT); } } DBUG_RETURN(NESTED_LOOP_OK); } /* ARGSUSED */ enum_nested_loop_state end_send_group(JOIN *join, QEP_TAB *qep_tab, bool end_of_records) { int idx= -1; enum_nested_loop_state ok_code= NESTED_LOOP_OK; List *fields= qep_tab ? qep_tab[-1].fields : join->fields; DBUG_ENTER("end_send_group"); if (!join->items3.is_null() && !join->set_group_rpa) { join->set_group_rpa= true; join->set_items_ref_array(join->items3); } if (!join->first_record || end_of_records || (idx=test_if_item_cache_changed(join->group_fields)) >= 0) { if (!join->group_sent && (join->first_record || (end_of_records && !join->grouped && !join->group_optimized_away))) { if (idx < (int) join->send_group_parts) { int error=0; { table_map save_nullinfo= 0; if (!join->first_record) { /* If this is a subquery, we need to save and later restore the const table NULL info before clearing the tables because the following executions of the subquery do not reevaluate constant fields. @see save_const_null_info and restore_const_null_info */ if (join->select_lex->master_unit()->item && join->const_tables) save_const_null_info(join, &save_nullinfo); // Calculate aggregate functions for no rows List_iterator_fast it(*fields); Item *item; while ((item= it++)) item->no_rows_in_result(); // Mark tables as containing only NULL values if (join->clear()) DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ } if (join->having_cond && join->having_cond->val_int() == 0) error= -1; // Didn't satisfy having else { if (join->do_send_rows) error= join->select_lex->query_result()->send_data(*fields); join->send_records++; join->group_sent= true; } if (join->rollup.state != ROLLUP::STATE_NONE && error <= 0) { if (join->rollup_send_data((uint) (idx+1))) error= 1; } if (save_nullinfo) restore_const_null_info(join, save_nullinfo); } if (error > 0) DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ if (end_of_records) DBUG_RETURN(NESTED_LOOP_OK); if (join->send_records >= join->unit->select_limit_cnt && join->do_send_rows) { if (!join->calc_found_rows) DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT); // Abort nicely join->do_send_rows=0; join->unit->select_limit_cnt = HA_POS_ERROR; } else if (join->send_records >= join->fetch_limit) { /* There is a server side cursor and all rows for this fetch request are sent. */ /* Preventing code duplication. When finished with the group reset the group functions and copy_fields. We fall through. bug #11904 */ ok_code= NESTED_LOOP_CURSOR_LIMIT; } } } else { if (end_of_records) DBUG_RETURN(NESTED_LOOP_OK); join->first_record=1; (void)(test_if_item_cache_changed(join->group_fields)); } if (idx < (int) join->send_group_parts) { /* This branch is executed also for cursors which have finished their fetch limit - the reason for ok_code. */ if (copy_fields(&join->tmp_table_param, join->thd)) DBUG_RETURN(NESTED_LOOP_ERROR); if (init_sum_functions(join->sum_funcs, join->sum_funcs_end[idx+1])) DBUG_RETURN(NESTED_LOOP_ERROR); join->group_sent= false; DBUG_RETURN(ok_code); } } if (update_sum_func(join->sum_funcs)) DBUG_RETURN(NESTED_LOOP_ERROR); DBUG_RETURN(NESTED_LOOP_OK); } static bool cmp_field_value(Field *field, my_ptrdiff_t diff) { assert(field); /* Records are different when: 1) NULL flags aren't the same 2) length isn't the same 3) data isn't the same */ const bool value1_isnull= field->is_real_null(); const bool value2_isnull= field->is_real_null(diff); if (value1_isnull != value2_isnull) // 1 return true; if (value1_isnull) return false; // Both values are null, no need to proceed. const size_t value1_length= field->data_length(); const size_t value2_length= field->data_length(diff); if (field->type() == MYSQL_TYPE_JSON) { Field_json *json_field= down_cast(field); // Fetch the JSON value on the left side of the comparison. Json_wrapper left_wrapper; if (json_field->val_json(&left_wrapper)) return true; /* purecov: inspected */ // Fetch the JSON value on the right side of the comparison. Json_wrapper right_wrapper; json_field->ptr+= diff; bool err= json_field->val_json(&right_wrapper); json_field->ptr-= diff; if (err) return true; /* purecov: inspected */ return (left_wrapper.compare(right_wrapper) != 0); } // Trailing space can't be skipped and length is different if (!field->is_text_key_type() && value1_length != value2_length) // 2 return true; if (field->cmp_max(field->ptr, field->ptr + diff, // 3 std::max(value1_length, value2_length))) return true; return false; } /** Compare GROUP BY in from tmp table's record[0] and record[1] @returns true records are different false records are the same */ bool group_rec_cmp(ORDER *group, uchar *rec0, uchar *rec1) { my_ptrdiff_t diff= rec1 - rec0; for (ORDER *grp= group; grp; grp= grp->next) { Item *item= *(grp->item); Field *field= item->get_tmp_table_field(); if (cmp_field_value(field, diff)) return true; } return false; } /** Compare GROUP BY in from tmp table's record[0] and record[1] @returns true records are different false records are the same */ bool table_rec_cmp(TABLE *table) { my_ptrdiff_t diff= table->record[1] - table->record[0]; Field **fields= table->visible_field_ptr(); for (uint i= 0; i < table->visible_field_count() ; i++) { Field *field= fields[i]; if (cmp_field_value(field, diff)) return true; } return false; } /** Generate hash for a field @returns generated hash */ ulonglong unique_hash(Field *field, ulonglong *hash_val) { uchar *pos, *end; ulong seed1=0, seed2= 4; ulonglong crc= *hash_val; if (field->is_null()) { /* Change crc in a way different from an empty string or 0. (This is an optimisation; The code will work even if this isn't done) */ crc=((crc << 8) + 511+ (crc >> (8*sizeof(ha_checksum)-8))); goto finish; } field->get_ptr(&pos); end= pos + field->data_length(); if (field->type() == MYSQL_TYPE_JSON) { Field_json *json_field= down_cast(field); crc= json_field->make_hash_key(hash_val); } else if (field->key_type() == HA_KEYTYPE_TEXT || field->key_type() == HA_KEYTYPE_VARTEXT1 || field->key_type() == HA_KEYTYPE_VARTEXT2) { field->charset()->coll->hash_sort(field->charset(), (const uchar*) pos, field->data_length(), &seed1, &seed2); crc^= seed1; } else while (pos != end) crc=((crc << 8) + (((uchar) *(uchar*) pos++))) + (crc >> (8*sizeof(ha_checksum)-8)); finish: *hash_val= crc; return crc; } /* Generate hash for unique constraint according to group-by list */ ulonglong unique_hash_group(ORDER *group) { ulonglong crc= 0; Field *field; for (ORDER *ord= group; ord ; ord= ord->next) { Item *item= *(ord->item); field= item->get_tmp_table_field(); assert(field); unique_hash(field, &crc); } return crc; } /* Generate hash for unique_constraint for all visible fields of a table */ ulonglong unique_hash_fields(TABLE *table) { ulonglong crc= 0; Field **fields= table->visible_field_ptr(); for (uint i=0 ; i < table->visible_field_count() ; i++) unique_hash(fields[i], &crc); return crc; } /** Check unique_constraint. @details Calculates record's hash and checks whether the record given in table->record[0] is already present in the tmp table. @param tab JOIN_TAB of tmp table to check @notes This function assumes record[0] is already filled by the caller. Depending on presence of table->group, it's or full list of table's fields are used to calculate hash. @returns false same record was found true record wasn't found */ bool check_unique_constraint(TABLE *table) { ulonglong hash; if (!table->hash_field) return true; if (table->no_keyread) return true; if (table->group) hash= unique_hash_group(table->group); else hash= unique_hash_fields(table); table->hash_field->store(hash, true); int res= table->file->ha_index_read_map(table->record[1], table->hash_field->ptr, HA_WHOLE_KEY, HA_READ_KEY_EXACT); while (!res) { // Check whether records are the same. if (!(table->distinct ? table_rec_cmp(table) : group_rec_cmp(table->group, table->record[0], table->record[1]))) return false; // skip it res= table->file->ha_index_next_same(table->record[1], table->hash_field->ptr, sizeof(hash)); } return true; } /* ARGSUSED */ static enum_nested_loop_state end_write(JOIN *join, QEP_TAB *const qep_tab, bool end_of_records) { TABLE *const table= qep_tab->table(); DBUG_ENTER("end_write"); if (join->thd->killed) // Aborted by user { join->thd->send_kill_message(); DBUG_RETURN(NESTED_LOOP_KILLED); /* purecov: inspected */ } if (!end_of_records) { Temp_table_param *const tmp_tbl= qep_tab->tmp_table_param; if (copy_fields(tmp_tbl, join->thd)) DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ if (copy_funcs(tmp_tbl->items_to_copy, join->thd)) DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ if (!qep_tab->having || qep_tab->having->val_int()) { int error; join->found_records++; if (!check_unique_constraint(table)) goto end; // skip it if ((error=table->file->ha_write_row(table->record[0]))) { if (table->file->is_ignorable_error(error)) goto end; if (create_ondisk_from_heap(join->thd, table, tmp_tbl->start_recinfo, &tmp_tbl->recinfo, error, TRUE, NULL)) DBUG_RETURN(NESTED_LOOP_ERROR); // Not a table_is_full error table->s->uniques=0; // To ensure rows are the same } if (++qep_tab->send_records >= tmp_tbl->end_write_records && join->do_send_rows) { if (!join->calc_found_rows) DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT); join->do_send_rows=0; join->unit->select_limit_cnt = HA_POS_ERROR; DBUG_RETURN(NESTED_LOOP_OK); } } } end: DBUG_RETURN(NESTED_LOOP_OK); } /* ARGSUSED */ /** Group by searching after group record and updating it if possible. */ static enum_nested_loop_state end_update(JOIN *join, QEP_TAB *const qep_tab, bool end_of_records) { TABLE *const table= qep_tab->table(); ORDER *group; int error; bool group_found= false; DBUG_ENTER("end_update"); if (end_of_records) DBUG_RETURN(NESTED_LOOP_OK); if (join->thd->killed) // Aborted by user { join->thd->send_kill_message(); DBUG_RETURN(NESTED_LOOP_KILLED); /* purecov: inspected */ } Temp_table_param *const tmp_tbl= qep_tab->tmp_table_param; join->found_records++; if (copy_fields(tmp_tbl, join->thd)) // Groups are copied twice. DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ /* Make a key of group index */ if (table->hash_field) { /* We need to call to copy_funcs here in order to get correct value for hash_field. However, this call isn't needed so early when hash_field isn't used as it would cause unnecessary additional evaluation of functions to be copied when 2nd and further records in group are found. */ if (copy_funcs(tmp_tbl->items_to_copy, join->thd)) DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ if (!check_unique_constraint(table)) group_found= true; } else { for (group=table->group ; group ; group=group->next) { Item *item= *group->item; item->save_org_in_field(group->field); /* Store in the used key if the field was 0 */ if (item->maybe_null) group->buff[-1]= (char) group->field->is_null(); } const uchar *key= tmp_tbl->group_buff; if (!table->file->ha_index_read_map(table->record[1], key, HA_WHOLE_KEY, HA_READ_KEY_EXACT)) group_found= true; } if (group_found) { /* Update old record */ restore_record(table, record[1]); update_tmptable_sum_func(join->sum_funcs, table); if ((error=table->file->ha_update_row(table->record[1], table->record[0]))) { // Old and new records are the same, ok to ignore if (error == HA_ERR_RECORD_IS_THE_SAME) DBUG_RETURN(NESTED_LOOP_OK); table->file->print_error(error, MYF(0)); /* purecov: inspected */ DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ } DBUG_RETURN(NESTED_LOOP_OK); } /* Copy null bits from group key to table We can't copy all data as the key may have different format as the row data (for example as with VARCHAR keys) */ if (!table->hash_field) { KEY_PART_INFO *key_part; for (group= table->group, key_part= table->key_info[0].key_part; group; group= group->next, key_part++) { if (key_part->null_bit) memcpy(table->record[0] + key_part->offset, group->buff, 1); } /* See comment on copy_funcs above. */ if (copy_funcs(tmp_tbl->items_to_copy, join->thd)) DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ } init_tmptable_sum_functions(join->sum_funcs); if ((error=table->file->ha_write_row(table->record[0]))) { /* If the error is HA_ERR_FOUND_DUPP_KEY and the grouping involves a TIMESTAMP field, throw a meaningfull error to user with the actual reason and the workaround. I.e, "Grouping on temporal is non-deterministic for timezones having DST. Please consider switching to UTC for this query". This is a temporary measure until we implement WL#13148 (Do all internal handling TIMESTAMP in UTC timezone), which will make such problem impossible. */ if (error == HA_ERR_FOUND_DUPP_KEY) { for (group=table->group ; group ; group=group->next) { if (group->field->type() == MYSQL_TYPE_TIMESTAMP) { my_error(ER_GROUPING_ON_TIMESTAMP_IN_DST, MYF(0)); DBUG_RETURN(NESTED_LOOP_ERROR); } } } if (create_ondisk_from_heap(join->thd, table, tmp_tbl->start_recinfo, &tmp_tbl->recinfo, error, FALSE, NULL)) DBUG_RETURN(NESTED_LOOP_ERROR); // Not a table_is_full error /* Change method to update rows */ if ((error= table->file->ha_index_init(0, 0))) { table->file->print_error(error, MYF(0)); DBUG_RETURN(NESTED_LOOP_ERROR); } } qep_tab->send_records++; DBUG_RETURN(NESTED_LOOP_OK); } /* ARGSUSED */ enum_nested_loop_state end_write_group(JOIN *join, QEP_TAB *const qep_tab, bool end_of_records) { TABLE *table= qep_tab->table(); int idx= -1; DBUG_ENTER("end_write_group"); if (join->thd->killed) { // Aborted by user join->thd->send_kill_message(); DBUG_RETURN(NESTED_LOOP_KILLED); /* purecov: inspected */ } if (!join->first_record || end_of_records || (idx=test_if_item_cache_changed(join->group_fields)) >= 0) { Temp_table_param *const tmp_tbl= qep_tab->tmp_table_param; if (join->first_record || (end_of_records && !join->grouped)) { int send_group_parts= join->send_group_parts; if (idx < send_group_parts) { table_map save_nullinfo= 0; if (!join->first_record) { // Dead code or we need a test case for this branch assert(false); /* If this is a subquery, we need to save and later restore the const table NULL info before clearing the tables because the following executions of the subquery do not reevaluate constant fields. @see save_const_null_info and restore_const_null_info */ if (join->select_lex->master_unit()->item && join->const_tables) save_const_null_info(join, &save_nullinfo); // Calculate aggregate functions for no rows List_iterator_fast it(*(qep_tab-1)->fields); Item *item; while ((item= it++)) item->no_rows_in_result(); // Mark tables as containing only NULL values if (join->clear()) DBUG_RETURN(NESTED_LOOP_ERROR); } copy_sum_funcs(join->sum_funcs, join->sum_funcs_end[send_group_parts]); if (!qep_tab->having || qep_tab->having->val_int()) { int error= table->file->ha_write_row(table->record[0]); if (error && create_ondisk_from_heap(join->thd, table, tmp_tbl->start_recinfo, &tmp_tbl->recinfo, error, FALSE, NULL)) DBUG_RETURN(NESTED_LOOP_ERROR); } if (join->rollup.state != ROLLUP::STATE_NONE) { if (join->rollup_write_data((uint) (idx+1), table)) DBUG_RETURN(NESTED_LOOP_ERROR); } if (save_nullinfo) restore_const_null_info(join, save_nullinfo); if (end_of_records) DBUG_RETURN(NESTED_LOOP_OK); } } else { if (end_of_records) DBUG_RETURN(NESTED_LOOP_OK); join->first_record=1; (void)(test_if_item_cache_changed(join->group_fields)); } if (idx < (int) join->send_group_parts) { if (copy_fields(tmp_tbl, join->thd)) DBUG_RETURN(NESTED_LOOP_ERROR); if (copy_funcs(tmp_tbl->items_to_copy, join->thd)) DBUG_RETURN(NESTED_LOOP_ERROR); if (init_sum_functions(join->sum_funcs, join->sum_funcs_end[idx+1])) DBUG_RETURN(NESTED_LOOP_ERROR); DBUG_RETURN(NESTED_LOOP_OK); } } if (update_sum_func(join->sum_funcs)) DBUG_RETURN(NESTED_LOOP_ERROR); DBUG_RETURN(NESTED_LOOP_OK); } /* If not selecting by given key, create an index how records should be read SYNOPSIS create_sort_index() thd Thread handler join Join with table to sort order How table should be sorted filesort_limit Max number of rows that needs to be sorted select_limit Max number of rows in final output Used to decide if we should use index or not IMPLEMENTATION - If there is an index that can be used, the first non-const join_tab in 'join' is modified to use this index. - If no index, create with filesort() an index file that can be used to retrieve rows in order (should be done with 'read_record'). The sorted data is stored in tab->table() and will be freed when calling free_io_cache(tab->table()). RETURN VALUES 0 ok -1 Some fatal error 1 No records */ static int create_sort_index(THD *thd, JOIN *join, QEP_TAB *tab) { ha_rows examined_rows, found_rows, returned_rows; TABLE *table; bool status; Filesort *fsort= tab->filesort; DBUG_ENTER("create_sort_index"); // One row, no need to sort. make_tmp_tables_info should already handle this. assert(!join->plan_is_const() && fsort); table= tab->table(); table->sort.io_cache=(IO_CACHE*) my_malloc(key_memory_TABLE_sort_io_cache, sizeof(IO_CACHE), MYF(MY_WME | MY_ZEROFILL)); table->status=0; // May be wrong if quick_select // If table has a range, move it to select if (tab->quick() && tab->ref().key >= 0) { if (tab->type() != JT_REF_OR_NULL && tab->type() != JT_FT) { assert(tab->type() == JT_REF || tab->type() == JT_EQ_REF); // Update ref value if ((cp_buffer_from_ref(thd, table, &tab->ref()) && thd->is_fatal_error)) goto err; // out of memory } } /* Fill schema tables with data before filesort if it's necessary */ if ((join->select_lex->active_options() & OPTION_SCHEMA_TABLE) && get_schema_tables_result(join, PROCESSED_BY_CREATE_SORT_INDEX)) goto err; if (table->s->tmp_table) table->file->info(HA_STATUS_VARIABLE); // Get record count status= filesort(thd, fsort, tab->keep_current_rowid, &examined_rows, &found_rows, &returned_rows); table->sort.found_records= returned_rows; tab->set_records(found_rows); // For SQL_CALC_ROWS tab->join()->examined_rows+=examined_rows; table->set_keyread(FALSE); // Restore if we used indexes if (tab->type() == JT_FT) table->file->ft_end(); else table->file->ha_index_or_rnd_end(); DBUG_RETURN(status); err: DBUG_RETURN(-1); } /***************************************************************************** Remove duplicates from tmp table This should be recoded to add a unique index to the table and remove duplicates Table is a locked single thread table fields is the number of fields to check (from the end) *****************************************************************************/ static bool compare_record(TABLE *table, Field **ptr) { for (; *ptr ; ptr++) { if ((*ptr)->cmp_offset(table->s->rec_buff_length)) return 1; } return 0; } static bool copy_blobs(Field **ptr) { for (; *ptr ; ptr++) { if ((*ptr)->flags & BLOB_FLAG) if (((Field_blob *) (*ptr))->copy()) return 1; // Error } return 0; } static void free_blobs(Field **ptr) { for (; *ptr ; ptr++) { if ((*ptr)->flags & BLOB_FLAG) ((Field_blob *) (*ptr))->mem_free(); } } bool QEP_TAB::remove_duplicates() { bool error; ulong reclength,offset; uint field_count; List *field_list= (this-1)->fields; DBUG_ENTER("remove_duplicates"); assert(join()->tmp_tables > 0 && table()->s->tmp_table != NO_TMP_TABLE); THD_STAGE_INFO(join()->thd, stage_removing_duplicates); TABLE *const tbl= table(); tbl->reginfo.lock_type=TL_WRITE; /* Calculate how many saved fields there is in list */ field_count=0; List_iterator it(*field_list); Item *item; while ((item=it++)) { if (item->get_tmp_table_field() && ! item->const_item()) field_count++; } if (!field_count && !join()->calc_found_rows && !having) { // only const items with no OPTION_FOUND_ROWS join()->unit->select_limit_cnt= 1; // Only send first row DBUG_RETURN(false); } Field **first_field= tbl->field+ tbl->s->fields - field_count; offset= (field_count ? tbl->field[tbl->s->fields - field_count]-> offset(tbl->record[0]) : 0); reclength= tbl->s->reclength-offset; free_io_cache(tbl); // Safety tbl->file->info(HA_STATUS_VARIABLE); if (tbl->s->db_type() == heap_hton || (!tbl->s->blob_fields && ((ALIGN_SIZE(reclength) + HASH_OVERHEAD) * tbl->file->stats.records < join()->thd->variables.sortbuff_size))) error=remove_dup_with_hash_index(join()->thd, tbl, field_count, first_field, reclength, having); else error=remove_dup_with_compare(join()->thd, tbl, first_field, offset, having); free_blobs(first_field); DBUG_RETURN(error); } static bool remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field, ulong offset, Item *having) { handler *file=table->file; char *org_record,*new_record; uchar *record; int error; ulong reclength= table->s->reclength-offset; DBUG_ENTER("remove_dup_with_compare"); org_record=(char*) (record=table->record[0])+offset; new_record=(char*) table->record[1]+offset; if ((error= file->ha_rnd_init(1))) goto err; error=file->ha_rnd_next(record); for (;;) { if (thd->killed) { thd->send_kill_message(); error=0; goto err; } if (error) { if (error == HA_ERR_RECORD_DELETED) { error= file->ha_rnd_next(record); continue; } if (error == HA_ERR_END_OF_FILE) break; goto err; } if (having && !having->val_int()) { if ((error=file->ha_delete_row(record))) goto err; error=file->ha_rnd_next(record); continue; } if (copy_blobs(first_field)) { error=0; goto err; } memcpy(new_record,org_record,reclength); /* Read through rest of file and mark duplicated rows deleted */ bool found=0; for (;;) { if ((error=file->ha_rnd_next(record))) { if (error == HA_ERR_RECORD_DELETED) continue; if (error == HA_ERR_END_OF_FILE) break; goto err; } if (compare_record(table, first_field) == 0) { if ((error=file->ha_delete_row(record))) goto err; } else if (!found) { found=1; file->position(record); // Remember position } } if (!found) break; // End of file /* Restart search on next row */ error=file->ha_rnd_pos(record, file->ref); } file->extra(HA_EXTRA_NO_CACHE); DBUG_RETURN(false); err: file->extra(HA_EXTRA_NO_CACHE); if (file->inited) (void) file->ha_rnd_end(); if (error) file->print_error(error,MYF(0)); DBUG_RETURN(true); } /** Generate a hash index for each row to quickly find duplicate rows. @note Note that this will not work on tables with blobs! */ static bool remove_dup_with_hash_index(THD *thd, TABLE *table, uint field_count, Field **first_field, ulong key_length, Item *having) { uchar *key_buffer, *key_pos, *record=table->record[0]; int error; handler *file= table->file; ulong extra_length= ALIGN_SIZE(key_length)-key_length; uint *field_lengths,*field_length; HASH hash; DBUG_ENTER("remove_dup_with_hash_index"); if (!my_multi_malloc(key_memory_hash_index_key_buffer, MYF(MY_WME), &key_buffer, (uint) ((key_length + extra_length) * (long) file->stats.records), &field_lengths, (uint) (field_count*sizeof(*field_lengths)), NullS)) DBUG_RETURN(true); { Field **ptr; ulong total_length= 0; for (ptr= first_field, field_length=field_lengths ; *ptr ; ptr++) { uint length= (*ptr)->sort_length(); (*field_length++)= length; total_length+= length; } DBUG_PRINT("info",("field_count: %u key_length: %lu total_length: %lu", field_count, key_length, total_length)); assert(total_length <= key_length); key_length= total_length; extra_length= ALIGN_SIZE(key_length)-key_length; } if (my_hash_init(&hash, &my_charset_bin, (uint) file->stats.records, 0, key_length, (my_hash_get_key) 0, 0, 0, key_memory_hash_index_key_buffer)) { my_free(key_buffer); DBUG_RETURN(true); } if ((error= file->ha_rnd_init(1))) goto err; key_pos=key_buffer; for (;;) { uchar *org_key_pos; if (thd->killed) { thd->send_kill_message(); error=0; goto err; } if ((error=file->ha_rnd_next(record))) { if (error == HA_ERR_RECORD_DELETED) continue; if (error == HA_ERR_END_OF_FILE) break; goto err; } if (having && !having->val_int()) { if ((error=file->ha_delete_row(record))) goto err; continue; } /* copy fields to key buffer */ org_key_pos= key_pos; field_length=field_lengths; for (Field **ptr= first_field ; *ptr ; ptr++) { (*ptr)->make_sort_key(key_pos,*field_length); key_pos+= *field_length++; } /* Check if it exists before */ if (my_hash_search(&hash, org_key_pos, key_length)) { /* Duplicated found ; Remove the row */ if ((error=file->ha_delete_row(record))) goto err; } else { if (my_hash_insert(&hash, org_key_pos)) goto err; } key_pos+=extra_length; } my_free(key_buffer); my_hash_free(&hash); file->extra(HA_EXTRA_NO_CACHE); (void) file->ha_rnd_end(); DBUG_RETURN(false); err: my_free(key_buffer); my_hash_free(&hash); file->extra(HA_EXTRA_NO_CACHE); if (file->inited) (void) file->ha_rnd_end(); if (error) file->print_error(error,MYF(0)); DBUG_RETURN(true); } /* eq_ref: Create the lookup key and check if it is the same as saved key SYNOPSIS cmp_buffer_with_ref() tab Join tab of the accessed table table The table to read. This is usually tab->table(), except for semi-join when we might need to make a lookup in a temptable instead. tab_ref The structure with methods to collect index lookup tuple. This is usually table->ref, except for the case of when we're doing lookup into semi-join materialization table. DESCRIPTION Used by eq_ref access method: create the index lookup key and check if we've used this key at previous lookup (If yes, we don't need to repeat the lookup - the record has been already fetched) RETURN TRUE No cached record for the key, or failed to create the key (due to out-of-domain error) FALSE The created key is the same as the previous one (and the record is already in table->record) */ static bool cmp_buffer_with_ref(THD *thd, TABLE *table, TABLE_REF *tab_ref) { bool no_prev_key; if (!tab_ref->disable_cache) { if (!(no_prev_key= tab_ref->key_err)) { /* Previous access found a row. Copy its key */ memcpy(tab_ref->key_buff2, tab_ref->key_buff, tab_ref->key_length); } } else no_prev_key= TRUE; if ((tab_ref->key_err= cp_buffer_from_ref(thd, table, tab_ref)) || no_prev_key) return 1; return memcmp(tab_ref->key_buff2, tab_ref->key_buff, tab_ref->key_length) != 0; } bool cp_buffer_from_ref(THD *thd, TABLE *table, TABLE_REF *ref) { enum enum_check_fields save_count_cuted_fields= thd->count_cuted_fields; thd->count_cuted_fields= CHECK_FIELD_IGNORE; my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->write_set); bool result= 0; for (uint part_no= 0; part_no < ref->key_parts; part_no++) { store_key *s_key= ref->key_copy[part_no]; if (!s_key) continue; if (s_key->copy() & 1) { result= 1; break; } } thd->count_cuted_fields= save_count_cuted_fields; dbug_tmp_restore_column_map(table->write_set, old_map); return result; } /** allocate group fields or take prepared (cached). @param main_join join of current select @param curr_join current join (join of current select or temporary copy of it) @retval 0 ok @retval 1 failed */ bool make_group_fields(JOIN *main_join, JOIN *curr_join) { if (main_join->group_fields_cache.elements) { curr_join->group_fields= main_join->group_fields_cache; curr_join->sort_and_group= 1; } else { if (alloc_group_fields(curr_join, curr_join->group_list)) return (1); main_join->group_fields_cache= curr_join->group_fields; } return (0); } /** Get a list of buffers for saveing last group. Groups are saved in reverse order for easyer check loop. */ bool alloc_group_fields(JOIN *join, ORDER *group) { if (group) { for (; group ; group=group->next) { Cached_item *tmp=new_Cached_item(join->thd, *group->item, FALSE); if (!tmp || join->group_fields.push_front(tmp)) return TRUE; } } join->sort_and_group=1; /* Mark for do_select */ return FALSE; } /* Test if a single-row cache of items changed, and update the cache. @details Test if a list of items that typically represents a result row has changed. If the value of some item changed, update the cached value for this item. @param list list of pairs stored as Cached_item. @return -1 if no item changed @return index of the first item that changed */ int test_if_item_cache_changed(List &list) { DBUG_ENTER("test_if_item_cache_changed"); List_iterator li(list); int idx= -1,i; Cached_item *buff; for (i=(int) list.elements-1 ; (buff=li++) ; i--) { if (buff->cmp()) idx=i; } DBUG_PRINT("info", ("idx: %d", idx)); DBUG_RETURN(idx); } /** Setup copy_fields to save fields at start of new group. Setup copy_fields to save fields at start of new group Only FIELD_ITEM:s and FUNC_ITEM:s needs to be saved between groups. Change old item_field to use a new field with points at saved fieldvalue This function is only called before use of send_result_set_metadata. @param thd THD pointer @param param temporary table parameters @param ref_pointer_array array of pointers to top elements of filed list @param res_selected_fields new list of items of select item list @param res_all_fields new list of all items @param elements number of elements in select item list @param all_fields all fields list @todo In most cases this result will be sent to the user. This should be changed to use copy_int or copy_real depending on how the value is to be used: In some cases this may be an argument in a group function, like: IF(ISNULL(col),0,COUNT(*)) @retval 0 ok @retval !=0 error */ bool setup_copy_fields(THD *thd, Temp_table_param *param, Ref_ptr_array ref_pointer_array, List &res_selected_fields, List &res_all_fields, uint elements, List &all_fields) { Item *pos; List_iterator_fast li(all_fields); Copy_field *copy= NULL; Copy_field *copy_start MY_ATTRIBUTE((unused)); res_selected_fields.empty(); res_all_fields.empty(); List_iterator_fast itr(res_all_fields); List extra_funcs; uint i, border= all_fields.elements - elements; DBUG_ENTER("setup_copy_fields"); if (param->field_count && !(copy=param->copy_field= new Copy_field[param->field_count])) goto err2; param->copy_funcs.empty(); copy_start= copy; for (i= 0; (pos= li++); i++) { Field *field; uchar *tmp; Item *real_pos= pos->real_item(); /* Aggregate functions can be substituted for fields (by e.g. temp tables). We need to filter those substituted fields out. */ if (real_pos->type() == Item::FIELD_ITEM && !(real_pos != pos && ((Item_ref *)pos)->ref_type() == Item_ref::AGGREGATE_REF)) { Item_field *item; if (!(item= new Item_field(thd, ((Item_field*) real_pos)))) goto err; if (pos->type() == Item::REF_ITEM) { /* preserve the names of the ref when dereferncing */ Item_ref *ref= (Item_ref *) pos; item->db_name= ref->db_name; item->table_name= ref->table_name; item->item_name= ref->item_name; } pos= item; if (item->field->flags & BLOB_FLAG) { if (!(pos= Item_copy::create(pos))) goto err; /* Item_copy_string::copy for function can call Item_copy_string::val_int for blob via Item_ref. But if Item_copy_string::copy for blob isn't called before, it's value will be wrong so let's insert Item_copy_string for blobs in the beginning of copy_funcs (to see full test case look at having.test, BUG #4358) */ if (param->copy_funcs.push_front(pos)) goto err; } else { /* set up save buffer and change result_field to point at saved value */ field= item->field; item->result_field=field->new_field(thd->mem_root,field->table, 1); /* We need to allocate one extra byte for null handling. */ if (!(tmp= static_cast(sql_alloc(field->pack_length() + 1)))) goto err; if (copy) { assert (param->field_count > (uint) (copy - copy_start)); copy->set(tmp, item->result_field); item->result_field->move_field(copy->to_ptr, copy->to_null_ptr, 1); copy++; } } } else if ((real_pos->type() == Item::FUNC_ITEM || real_pos->type() == Item::SUBSELECT_ITEM || real_pos->type() == Item::CACHE_ITEM || real_pos->type() == Item::COND_ITEM) && !real_pos->with_sum_func) { // Save for send fields pos= real_pos; /* TODO: In most cases this result will be sent to the user. This should be changed to use copy_int or copy_real depending on how the value is to be used: In some cases this may be an argument in a group function, like: IF(ISNULL(col),0,COUNT(*)) */ if (!(pos= Item_copy::create(pos))) goto err; if (i < border) // HAVING, ORDER and GROUP BY { if (extra_funcs.push_back(pos)) goto err; } else if (param->copy_funcs.push_back(pos)) goto err; } res_all_fields.push_back(pos); ref_pointer_array[((i < border)? all_fields.elements-i-1 : i-border)]= pos; } param->copy_field_end= copy; for (i= 0; i < border; i++) itr++; itr.sublist(res_selected_fields, elements); /* Put elements from HAVING, ORDER BY and GROUP BY last to ensure that any reference used in these will resolve to a item that is already calculated */ param->copy_funcs.concat(&extra_funcs); DBUG_RETURN(0); err: if (copy) delete [] param->copy_field; // This is never 0 param->copy_field=0; err2: DBUG_RETURN(TRUE); } /** Make a copy of all simple SELECT'ed items. This is done at the start of a new group so that we can retrieve these later when the group changes. @returns false if OK, true on error. */ bool copy_fields(Temp_table_param *param, const THD *thd) { Copy_field *ptr=param->copy_field; Copy_field *end=param->copy_field_end; assert((ptr != NULL && end >= ptr) || (ptr == NULL && end == NULL)); for (; ptr < end; ptr++) ptr->invoke_do_copy(ptr); List_iterator_fast it(param->copy_funcs); Item_copy *item; bool is_error= thd->is_error(); while (!is_error && (item= (Item_copy*) it++)) is_error= item->copy(thd); return is_error; } /** Change all funcs and sum_funcs to fields in tmp table, and create new list of all items. @param thd THD pointer @param ref_pointer_array array of pointers to top elements of filed list @param res_selected_fields new list of items of select item list @param res_all_fields new list of all items @param elements number of elements in select item list @param all_fields all fields list @retval 0 ok @retval !=0 error */ bool change_to_use_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array, List &res_selected_fields, List &res_all_fields, uint elements, List &all_fields) { List_iterator_fast it(all_fields); Item *item_field,*item; DBUG_ENTER("change_to_use_tmp_fields"); res_selected_fields.empty(); res_all_fields.empty(); uint border= all_fields.elements - elements; for (uint i= 0; (item= it++); i++) { Field *field; if (item->with_sum_func && item->type() != Item::SUM_FUNC_ITEM) item_field= item; else if (item->type() == Item::FIELD_ITEM) item_field= item->get_tmp_table_item(thd); else if (item->type() == Item::FUNC_ITEM && ((Item_func*)item)->functype() == Item_func::SUSERVAR_FUNC) { field= item->get_tmp_table_field(); if (field != NULL) { /* Replace "@:=" with "@:=". Otherwise, we would re-evaluate , and if expression were a subquery, this would access already-unlocked tables. */ Item_func_set_user_var* suv= new Item_func_set_user_var(thd, (Item_func_set_user_var*) item); Item_field *new_field= new Item_field(field); if (!suv || !new_field) DBUG_RETURN(true); // Fatal error List list; list.push_back(new_field); suv->set_arguments(list, true); item_field= suv; } else item_field= item; } else if ((field= item->get_tmp_table_field())) { if (item->type() == Item::SUM_FUNC_ITEM && field->table->group) item_field= ((Item_sum*) item)->result_item(field); else item_field= (Item*) new Item_field(field); if (!item_field) DBUG_RETURN(true); // Fatal error if (item->real_item()->type() != Item::FIELD_ITEM) field->orig_table= 0; item_field->item_name= item->item_name; if (item->type() == Item::REF_ITEM) { Item_field *ifield= (Item_field *) item_field; Item_ref *iref= (Item_ref *) item; ifield->table_name= iref->table_name; ifield->db_name= iref->db_name; } #ifndef NDEBUG if (!item_field->item_name.is_set()) { char buff[256]; String str(buff,sizeof(buff),&my_charset_bin); str.length(0); item->print(&str, QT_ORDINARY); item_field->item_name.copy(str.ptr(), str.length()); } #endif } else item_field= item; res_all_fields.push_back(item_field); ref_pointer_array[((i < border)? all_fields.elements-i-1 : i-border)]= item_field; } List_iterator_fast itr(res_all_fields); for (uint i= 0; i < border; i++) itr++; itr.sublist(res_selected_fields, elements); DBUG_RETURN(false); } /** Change all sum_func refs to fields to point at fields in tmp table. Change all funcs to be fields in tmp table. @param thd THD pointer @param ref_pointer_array array of pointers to top elements of filed list @param res_selected_fields new list of items of select item list @param res_all_fields new list of all items @param elements number of elements in select item list @param all_fields all fields list @retval 0 ok @retval 1 error */ bool change_refs_to_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array, List &res_selected_fields, List &res_all_fields, uint elements, List &all_fields) { List_iterator_fast it(all_fields); Item *item, *new_item; res_selected_fields.empty(); res_all_fields.empty(); uint i, border= all_fields.elements - elements; for (i= 0; (item= it++); i++) { res_all_fields.push_back(new_item= item->get_tmp_table_item(thd)); ref_pointer_array[((i < border)? all_fields.elements-i-1 : i-border)]= new_item; } List_iterator_fast itr(res_all_fields); for (i= 0; i < border; i++) itr++; itr.sublist(res_selected_fields, elements); return thd->is_fatal_error; } /** Save NULL-row info for constant tables. Used in conjunction with restore_const_null_info() to restore constant table null_row and status values after temporarily marking rows as NULL. This is only done for const tables in subqueries because these values are not recalculated on next execution of the subquery. @param join The join for which const tables are about to be marked as containing only NULL values @param[out] save_nullinfo Const tables that have null_row=false and STATUS_NULL_ROW set are tagged in this table_map so that the value can be restored by restore_const_null_info() @see TABLE::set_null_row @see restore_const_null_info */ static void save_const_null_info(JOIN *join, table_map *save_nullinfo) { assert(join->const_tables); for (uint tableno= 0; tableno < join->const_tables; tableno++) { QEP_TAB *const tab= join->qep_tab + tableno; TABLE *const table= tab->table(); /* table->status and table->null_row must be in sync: either both set or none set. Otherwise, an additional table_map parameter is needed to save/restore_const_null_info() these separately */ assert(table->has_null_row() ? (table->status & STATUS_NULL_ROW) : !(table->status & STATUS_NULL_ROW)); if (!table->has_null_row()) *save_nullinfo|= tab->table_ref->map(); } } /** Restore NULL-row info for constant tables. Used in conjunction with save_const_null_info() to restore constant table null_row and status values after temporarily marking rows as NULL. This is only done for const tables in subqueries because these values are not recalculated on next execution of the subquery. @param join The join for which const tables have been marked as containing only NULL values @param save_nullinfo Const tables that had null_row=false and STATUS_NULL_ROW set when save_const_null_info() was called @see TABLE::set_null_row @see save_const_null_info */ static void restore_const_null_info(JOIN *join, table_map save_nullinfo) { assert(join->const_tables && save_nullinfo); for (uint tableno= 0; tableno < join->const_tables; tableno++) { QEP_TAB *const tab= join->qep_tab + tableno; if ((save_nullinfo & tab->table_ref->map())) { /* The table had null_row=false and STATUS_NULL_ROW set when save_const_null_info was called */ tab->table()->reset_null_row(); } } } /**************************************************************************** QEP_tmp_table implementation ****************************************************************************/ /** @brief Instantiate tmp table and start index scan if necessary @todo Tmp table always would be created, even for empty result. Extend executor to avoid tmp table creation when no rows were written into tmp table. @return true error false ok */ bool QEP_tmp_table::prepare_tmp_table() { TABLE *table= qep_tab->table(); JOIN *join= qep_tab->join(); int rc= 0; Temp_table_param *const tmp_tbl= qep_tab->tmp_table_param; if (!table->is_created()) { if (instantiate_tmp_table(table, tmp_tbl->keyinfo, tmp_tbl->start_recinfo, &tmp_tbl->recinfo, join->select_lex->active_options(), join->thd->variables.big_tables, &join->thd->opt_trace)) return true; (void) table->file->extra(HA_EXTRA_WRITE_CACHE); empty_record(table); } /* If it wasn't already, start index scan for grouping using table index. */ if (!table->file->inited && ((table->group && tmp_tbl->sum_func_count && table->s->keys) || table->hash_field)) rc= table->file->ha_index_init(0, 0); else { /* Start index scan in scanning mode */ rc= table->file->ha_rnd_init(true); } if (rc) { table->file->print_error(rc, MYF(0)); return true; } return false; } /** @brief Prepare table if necessary and call write_func to save record @param end_of_record the end_of_record signal to pass to the writer @return return one of enum_nested_loop_state. */ enum_nested_loop_state QEP_tmp_table::put_record(bool end_of_records) { // Lasy tmp table creation/initialization if (!qep_tab->table()->file->inited && prepare_tmp_table()) return NESTED_LOOP_ERROR; enum_nested_loop_state rc= (*write_func)(qep_tab->join(), qep_tab, end_of_records); return rc; } /** @brief Finish rnd/index scan after accumulating records, switch ref_array, and send accumulated records further. @return return one of enum_nested_loop_state. */ enum_nested_loop_state QEP_tmp_table::end_send() { enum_nested_loop_state rc= NESTED_LOOP_OK; TABLE *table= qep_tab->table(); JOIN *join= qep_tab->join(); // All records were stored, send them further int tmp, new_errno= 0; if ((rc= put_record(true)) < NESTED_LOOP_OK) return rc; if ((tmp= table->file->extra(HA_EXTRA_NO_CACHE))) { DBUG_PRINT("error",("extra(HA_EXTRA_NO_CACHE) failed")); new_errno= tmp; } if ((tmp= table->file->ha_index_or_rnd_end())) { DBUG_PRINT("error",("ha_index_or_rnd_end() failed")); new_errno= tmp; } if (new_errno) { table->file->print_error(new_errno,MYF(0)); return NESTED_LOOP_ERROR; } // Update ref array join->set_items_ref_array(*qep_tab->ref_array); table->reginfo.lock_type= TL_UNLOCK; bool in_first_read= true; while (rc == NESTED_LOOP_OK) { int error; if (in_first_read) { in_first_read= false; error= join_init_read_record(qep_tab); } else error= qep_tab->read_record.read_record(&qep_tab->read_record); if (error > 0 || (join->thd->is_error())) // Fatal error rc= NESTED_LOOP_ERROR; else if (error < 0) break; else if (join->thd->killed) // Aborted by user { join->thd->send_kill_message(); rc= NESTED_LOOP_KILLED; } else rc= evaluate_join_record(join, qep_tab); } // Finish rnd scn after sending records if (table->file->inited) table->file->ha_rnd_end(); return rc; } /****************************************************************************** Code for pfs_batch_update ******************************************************************************/ bool QEP_TAB::pfs_batch_update(JOIN *join) { /* Use PFS batch mode unless 1. tab is not an inner-most table, or 2. a table has eq_ref or const access type, or 3. this tab contains a subquery that accesses one or more tables */ return !((join->qep_tab + join->primary_tables - 1) != this || // 1 this->type() == JT_EQ_REF || // 2 this->type() == JT_CONST || this->type() == JT_SYSTEM || (condition() && condition()->has_subquery())); // 3 } /** @} (end of group Query_Executor) */