1 /* Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23 /**
24 @file sql/sql_tmp_table.cc
25 Temporary tables implementation.
26 */
27
28 #include "sql/sql_tmp_table.h"
29
30 #include <fcntl.h>
31 #include <stddef.h>
32 #include <stdio.h>
33 #include <algorithm>
34 #include <cstring>
35 #include <new>
36 #include <utility>
37 #include <vector>
38
39 #include "field_types.h"
40 #include "lex_string.h"
41 #include "m_ctype.h"
42 #include "m_string.h"
43 #include "my_alloc.h"
44 #include "my_bitmap.h"
45 #include "my_compiler.h"
46 #include "my_dbug.h"
47 #include "my_pointer_arithmetic.h"
48 #include "my_sys.h"
49 #include "mysql/plugin.h"
50 #include "mysql/udf_registration_types.h"
51 #include "mysql_com.h"
52 #include "mysqld_error.h"
53 #include "scope_guard.h"
54 #include "sql/create_field.h"
55 #include "sql/current_thd.h"
56 #include "sql/dd/types/column.h"
57 #include "sql/debug_sync.h" // DEBUG_SYNC
58 #include "sql/field.h"
59 #include "sql/filesort.h" // filesort_free_buffers
60 #include "sql/handler.h"
61 #include "sql/item_func.h" // Item_func
62 #include "sql/item_sum.h" // Item_sum
63 #include "sql/key.h"
64 #include "sql/mem_root_allocator.h"
65 #include "sql/mem_root_array.h" // Mem_root_array
66 #include "sql/mysqld.h" // heap_hton
67 #include "sql/opt_range.h" // QUICK_SELECT_I
68 #include "sql/opt_trace.h" // Opt_trace_object
69 #include "sql/opt_trace_context.h" // Opt_trace_context
70 #include "sql/psi_memory_key.h"
71 #include "sql/query_options.h"
72 #include "sql/sql_base.h" // free_io_cache
73 #include "sql/sql_class.h" // THD
74 #include "sql/sql_const.h"
75 #include "sql/sql_executor.h" // SJ_TMP_TABLE
76 #include "sql/sql_lex.h"
77 #include "sql/sql_list.h"
78 #include "sql/sql_opt_exec_shared.h"
79 #include "sql/sql_plugin.h" // plugin_unlock
80 #include "sql/sql_plugin_ref.h"
81 #include "sql/sql_select.h"
82 #include "sql/system_variables.h"
83 #include "sql/table.h"
84 #include "sql/temp_table_param.h"
85 #include "sql/thd_raii.h"
86 #include "sql/thr_malloc.h"
87 #include "sql/window.h"
88 #include "template_utils.h"
89
90 using std::max;
91 using std::min;
92 static bool setup_tmp_table_handler(TABLE *table, ulonglong select_options,
93 bool force_disk_table, bool schema_table);
94 static bool alloc_record_buffers(TABLE *table);
95
96 /****************************************************************************
97 Create internal temporary table
98 ****************************************************************************/
99
100 /**
101 Create field for temporary table from given field.
102
103 @param thd Thread handler
104 @param org_field Field from which new field will be created
105 @param name New field name
106 @param table Temporary table
107 @param item If item != NULL then fill_record() will update
108 the record in the original table.
109 If item == NULL then fill_record() will update
110 the temporary table
111
112 @retval
113 NULL on error
114 @retval
115 new_created field
116 */
117
create_tmp_field_from_field(THD * thd,const Field * org_field,const char * name,TABLE * table,Item_field * item)118 Field *create_tmp_field_from_field(THD *thd, const Field *org_field,
119 const char *name, TABLE *table,
120 Item_field *item) {
121 Field *new_field = org_field->new_field(thd->mem_root, table);
122 if (new_field == nullptr) return nullptr;
123
124 new_field->init(table);
125 new_field->orig_table = org_field->table;
126 new_field->field_name = name;
127 if (org_field->is_flag_set(NO_DEFAULT_VALUE_FLAG))
128 new_field->set_flag(NO_DEFAULT_VALUE_FLAG);
129 if (org_field->is_nullable() || org_field->table->is_nullable() ||
130 (item && item->maybe_null))
131 new_field->clear_flag(NOT_NULL_FLAG); // Because of outer join
132 if (org_field->type() == FIELD_TYPE_DOUBLE)
133 down_cast<Field_double *>(new_field)->not_fixed = true;
134 /*
135 This field will belong to an internal temporary table, it cannot be
136 generated.
137 */
138 new_field->gcol_info = nullptr;
139 new_field->stored_in_db = true;
140 return new_field;
141 }
142
143 /**
144 Create field for temporary table using type of given item.
145
146 @param item Item to create a field for
147 @param table Temporary table
148
149 @retval
150 0 on error
151 @retval
152 new_created field
153 */
154
create_tmp_field_from_item(Item * item,TABLE * table)155 static Field *create_tmp_field_from_item(Item *item, TABLE *table) {
156 bool maybe_null = item->maybe_null;
157 Field *new_field = nullptr;
158
159 switch (item->result_type()) {
160 case REAL_RESULT:
161 new_field = new (*THR_MALLOC)
162 Field_double(item->max_length, maybe_null, item->item_name.ptr(),
163 item->decimals, false, true);
164 break;
165 case INT_RESULT:
166 /*
167 Select an integer type with the minimal fit precision.
168 MY_INT32_NUM_DECIMAL_DIGITS is sign inclusive, don't consider the sign.
169 Values with MY_INT32_NUM_DECIMAL_DIGITS digits may or may not fit into
170 Field_long : make them Field_longlong.
171 */
172 if (item->max_length >= (MY_INT32_NUM_DECIMAL_DIGITS - 1))
173 new_field = new (*THR_MALLOC)
174 Field_longlong(item->max_length, maybe_null, item->item_name.ptr(),
175 item->unsigned_flag);
176 else
177 new_field = new (*THR_MALLOC)
178 Field_long(item->max_length, maybe_null, item->item_name.ptr(),
179 item->unsigned_flag);
180 break;
181 case STRING_RESULT:
182 DBUG_ASSERT(item->collation.collation);
183
184 /*
185 DATE/TIME, GEOMETRY and JSON fields have STRING_RESULT result type.
186 To preserve type they needed to be handled separately.
187 */
188 if (item->is_temporal() || item->data_type() == MYSQL_TYPE_GEOMETRY ||
189 item->data_type() == MYSQL_TYPE_JSON) {
190 new_field = item->tmp_table_field_from_field_type(table, true);
191 } else {
192 new_field = item->make_string_field(table);
193 }
194 new_field->set_derivation(item->collation.derivation);
195 break;
196 case DECIMAL_RESULT:
197 new_field = Field_new_decimal::create_from_item(item);
198 break;
199 case ROW_RESULT:
200 default:
201 // This case should never be choosen
202 DBUG_ASSERT(0);
203 new_field = nullptr;
204 break;
205 }
206 if (new_field == nullptr) return nullptr;
207
208 new_field->init(table);
209
210 if (item->type() == Item::NULL_ITEM)
211 new_field->is_created_from_null_item = true;
212 return new_field;
213 }
214
215 /**
216 Create field for information schema table.
217
218 @param table Temporary table
219 @param item Item to create a field for
220
221 @retval
222 0 on error
223 @retval
224 new_created field
225 */
226
create_tmp_field_for_schema(const Item * item,TABLE * table)227 static Field *create_tmp_field_for_schema(const Item *item, TABLE *table) {
228 if (item->data_type() == MYSQL_TYPE_VARCHAR) {
229 Field *field;
230 if (item->max_length > MAX_FIELD_VARCHARLENGTH)
231 field = new (*THR_MALLOC)
232 Field_blob(item->max_length, item->maybe_null, item->item_name.ptr(),
233 item->collation.collation, false);
234 else {
235 field = new (*THR_MALLOC) Field_varstring(
236 item->max_length, item->maybe_null, item->item_name.ptr(), table->s,
237 item->collation.collation);
238 table->s->db_create_options |= HA_OPTION_PACK_RECORD;
239 }
240 if (field) field->init(table);
241 return field;
242 }
243 return item->tmp_table_field_from_field_type(table, false);
244 }
245
246 /**
247 Create field for temporary table.
248
249 @param thd Thread handler
250 @param table Temporary table
251 @param item Item to create a field for
252 @param type Type of item (normally item->type)
253 @param copy_func If set and item is a function, store copy of item
254 in this array
255 @param from_field if field will be created using other field as example,
256 pointer example field will be written here
257 @param default_field If field has a default value field, store it here
258 @param group 1 if we are going to do a relative group by on result
259 @param modify_item 1 if item->result_field should point to new item.
260 This is relevent for how fill_record() is going to
261 work:
262 If modify_item is 1 then fill_record() will update
263 the record in the original table.
264 If modify_item is 0 then fill_record() will update
265 the temporary table
266 @param table_cant_handle_bit_fields if table can't handle bit-fields and
267 bit-fields shall be converted to long @see
268 Temp_table_param::bit_fields_as_long
269 @param make_copy_field if true, a pointer of the result field should be stored
270 in from_field, otherwise the item should be wrapped in Func_ptr and stored in
271 copy_func
272 @param copy_result_field true <=> save item's result_field in the from_field
273 arg, before changing it. This is used for a window's
274 OUT table when window uses frame buffer to copy a
275 function's result field from OUT table to frame buffer
276 (and back). @note that the goals of 'from_field' when
277 this argument is true and when it is false, are
278 different.
279
280 @retval NULL On error.
281
282 @retval new_created field
283 */
284
create_tmp_field(THD * thd,TABLE * table,Item * item,Item::Type type,Func_ptr_array * copy_func,Field ** from_field,Field ** default_field,bool group,bool modify_item,bool table_cant_handle_bit_fields,bool make_copy_field,bool copy_result_field)285 Field *create_tmp_field(THD *thd, TABLE *table, Item *item, Item::Type type,
286 Func_ptr_array *copy_func, Field **from_field,
287 Field **default_field, bool group, bool modify_item,
288 bool table_cant_handle_bit_fields, bool make_copy_field,
289 bool copy_result_field) {
290 DBUG_TRACE;
291 Field *result = nullptr;
292 Item::Type orig_type = type;
293 Item *orig_item = nullptr;
294
295 if (type != Item::FIELD_ITEM &&
296 item->real_item()->type() == Item::FIELD_ITEM) {
297 orig_item = item;
298 item = item->real_item();
299 type = Item::FIELD_ITEM;
300 }
301
302 bool is_wf =
303 type == Item::SUM_FUNC_ITEM && item->real_item()->m_is_window_function;
304
305 switch (type) {
306 case Item::FIELD_ITEM:
307 case Item::DEFAULT_VALUE_ITEM:
308 case Item::TRIGGER_FIELD_ITEM: {
309 Item_field *item_field = down_cast<Item_field *>(item);
310 /*
311 If item have to be able to store NULLs but underlaid field can't do it,
312 create_tmp_field_from_field() can't be used for tmp field creation.
313 */
314 if (item_field->maybe_null &&
315 !(item_field->field->is_nullable() ||
316 item_field->field->table->is_nullable())) {
317 result = create_tmp_field_from_item(item_field, table);
318 } else if (table_cant_handle_bit_fields &&
319 item_field->field->type() == MYSQL_TYPE_BIT) {
320 result = create_tmp_field_from_item(item_field, table);
321 /*
322 If the item is a function, a pointer to the item is stored in
323 copy_func. We separate fields from functions by checking if the
324 item is a result field item. The real_item() must be checked to
325 avoid falsely identifying Item_ref and its subclasses as functions
326 when they refer to field-like items, such as Item_copy and
327 subclasses. References to true fields have already been untangled
328 in the beginning of create_tmp_field().
329 */
330 if (item->real_item()->is_result_field())
331 copy_func->push_back(Func_ptr(item));
332 } else {
333 result = create_tmp_field_from_field(
334 thd, item_field->field,
335 orig_item ? orig_item->item_name.ptr()
336 : item_field->item_name.ptr(),
337 table,
338 (modify_item && orig_type != Item::REF_ITEM) ? item_field
339 : nullptr);
340 }
341 if (result == nullptr) return nullptr;
342 if (modify_item) {
343 if (orig_type == Item::REF_ITEM)
344 orig_item->set_result_field(result);
345 else
346 item_field->set_result_field(result);
347 }
348 /*
349 Fields that are used as arguments to the DEFAULT() function already have
350 their data pointers set to the default value during name resolution. See
351 Item_default_value::fix_fields.
352 */
353 if (orig_type != Item::DEFAULT_VALUE_ITEM &&
354 item_field->field->eq_def(result))
355 *default_field = item_field->field;
356 *from_field = item_field->field;
357 break;
358 }
359 /* Fall through */
360 case Item::FUNC_ITEM:
361 if (down_cast<Item_func *>(item)->functype() == Item_func::FUNC_SP) {
362 Item_func_sp *item_func_sp = down_cast<Item_func_sp *>(item);
363 Field *sp_result_field = item_func_sp->get_sp_result_field();
364
365 if (make_copy_field) {
366 DBUG_ASSERT(item_func_sp->get_result_field());
367 *from_field = item_func_sp->get_result_field();
368 } else {
369 copy_func->push_back(Func_ptr(item));
370 }
371
372 result = create_tmp_field_from_field(thd, sp_result_field,
373 item_func_sp->item_name.ptr(),
374 table, nullptr);
375 if (!result) break;
376 if (modify_item) item_func_sp->set_result_field(result);
377 break;
378 }
379
380 /* Fall through */
381 case Item::COND_ITEM:
382 case Item::FIELD_AVG_ITEM:
383 case Item::FIELD_BIT_ITEM:
384 case Item::FIELD_STD_ITEM:
385 case Item::FIELD_VARIANCE_ITEM:
386 case Item::SUBSELECT_ITEM:
387 /* The following can only happen with 'CREATE TABLE ... SELECT' */
388 case Item::PROC_ITEM:
389 case Item::INT_ITEM:
390 case Item::REAL_ITEM:
391 case Item::DECIMAL_ITEM:
392 case Item::STRING_ITEM:
393 case Item::REF_ITEM:
394 case Item::NULL_ITEM:
395 case Item::VARBIN_ITEM:
396 case Item::PARAM_ITEM:
397 case Item::SUM_FUNC_ITEM:
398 if (type == Item::SUM_FUNC_ITEM && !is_wf) {
399 Item_sum *item_sum = down_cast<Item_sum *>(item);
400 result = item_sum->create_tmp_field(group, table);
401 if (!result) my_error(ER_OUT_OF_RESOURCES, MYF(ME_FATALERROR));
402 } else {
403 /*
404 (2) we're windowing. The Item doesn't contain any not-yet-calculated
405 window function (per logic in our caller create_tmp_table()). So it
406 is an ordinary function or can be considered as such. We're creating
407 the OUT table using IN table as source, and we have previously
408 created a frame buffer (FB) using IN table as source. That previous
409 creation has set IN's item's result_field to be the FB field. Here
410 we save that FB field in from_field. Right after that,
411 create_tmp_field_from_item() sets IN's item's result_field to the
412 OUT field (which OUT field is the 'result' variable). We mark the
413 OUT field with FIELD_IS_MARKED. Later we detect the mark, and create
414 a Copy_field to from_field (FB) from the marked field (OUT). The end
415 situation is: IN's item's result_field is in OUT, enabling the
416 initial function evaluation and saving of its result in OUT; the
417 Copy_field from OUT to FB and back will allow buffering/restoration
418 of that result.
419 */
420 if (make_copy_field || (copy_result_field && !is_wf)) // (2)
421 {
422 *from_field = item->get_tmp_table_field();
423 DBUG_ASSERT(*from_field);
424 }
425
426 result = create_tmp_field_from_item(item, table);
427 if (result == nullptr) return nullptr;
428 if (modify_item) item->set_result_field(result);
429 if (copy_func && !make_copy_field &&
430 item->real_item()->is_result_field())
431 copy_func->push_back(Func_ptr(item));
432 if (copy_result_field) result->set_flag(FIELD_IS_MARKED);
433 }
434 break;
435 case Item::TYPE_HOLDER:
436 case Item::VALUES_COLUMN_ITEM:
437 result = down_cast<Item_aggregate_type *>(item)->make_field_by_type(
438 table, thd->is_strict_mode());
439 break;
440 default: // Doesn't have to be stored
441 DBUG_ASSERT(false);
442 break;
443 }
444 return result;
445 }
446
447 /*
448 Set up column usage bitmaps for a temporary table
449
450 IMPLEMENTATION
451 For temporary tables, we need one bitmap with all columns set and
452 a tmp_set bitmap to be used by things like filesort.
453 */
454
setup_tmp_table_column_bitmaps(TABLE * table,uchar * bitmaps)455 static void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps) {
456 uint field_count = table->s->fields;
457 bitmap_init(&table->def_read_set, (my_bitmap_map *)bitmaps, field_count);
458 bitmap_init(&table->tmp_set,
459 (my_bitmap_map *)(bitmaps + bitmap_buffer_size(field_count)),
460 field_count);
461 bitmap_init(&table->cond_set,
462 (my_bitmap_map *)(bitmaps + bitmap_buffer_size(field_count) * 2),
463 field_count);
464 /* write_set and all_set are copies of read_set */
465 table->def_write_set = table->def_read_set;
466 table->s->all_set = table->def_read_set;
467 bitmap_set_all(&table->s->all_set);
468 table->default_column_bitmaps();
469 table->s->column_bitmap_size = bitmap_buffer_size(field_count);
470 }
471
472 /**
473 Cache for the storage engine properties for the alternative temporary table
474 storage engines. This cache is initialized during startup of the server by
475 asking the storage engines for the values properties.
476 */
477
478 class Cache_temp_engine_properties {
479 public:
480 static uint HEAP_MAX_KEY_LENGTH;
481 static uint TEMPTABLE_MAX_KEY_LENGTH;
482 static uint INNODB_MAX_KEY_LENGTH;
483 static uint HEAP_MAX_KEY_PART_LENGTH;
484 static uint TEMPTABLE_MAX_KEY_PART_LENGTH;
485 static uint INNODB_MAX_KEY_PART_LENGTH;
486 static uint HEAP_MAX_KEY_PARTS;
487 static uint TEMPTABLE_MAX_KEY_PARTS;
488 static uint INNODB_MAX_KEY_PARTS;
489
490 static void init(THD *thd);
491 };
492
init(THD * thd)493 void Cache_temp_engine_properties::init(THD *thd) {
494 handler *handler;
495 plugin_ref db_plugin;
496
497 // Cache HEAP engine's
498 db_plugin = ha_lock_engine(nullptr, heap_hton);
499 handler =
500 get_new_handler((TABLE_SHARE *)nullptr, false, thd->mem_root, heap_hton);
501 HEAP_MAX_KEY_LENGTH = handler->max_key_length();
502 HEAP_MAX_KEY_PART_LENGTH = handler->max_key_part_length(nullptr);
503 HEAP_MAX_KEY_PARTS = handler->max_key_parts();
504 destroy(handler);
505 plugin_unlock(nullptr, db_plugin);
506 // Cache TempTable engine's
507 db_plugin = ha_lock_engine(nullptr, temptable_hton);
508 handler = get_new_handler((TABLE_SHARE *)nullptr, false, thd->mem_root,
509 temptable_hton);
510 TEMPTABLE_MAX_KEY_LENGTH = handler->max_key_length();
511 TEMPTABLE_MAX_KEY_PART_LENGTH = handler->max_key_part_length(nullptr);
512 TEMPTABLE_MAX_KEY_PARTS = handler->max_key_parts();
513 destroy(handler);
514 plugin_unlock(nullptr, db_plugin);
515 // Cache INNODB engine's
516 db_plugin = ha_lock_engine(nullptr, innodb_hton);
517 handler = get_new_handler((TABLE_SHARE *)nullptr, false, thd->mem_root,
518 innodb_hton);
519 INNODB_MAX_KEY_LENGTH = handler->max_key_length();
520 /*
521 For ha_innobase::max_supported_key_part_length(), the returned value
522 is constant. However, in innodb itself, the limitation
523 on key_part length is up to the ROW_FORMAT. In current trunk, internal
524 temp table's ROW_FORMAT is DYNAMIC. In order to keep the consistence
525 between server and innodb, here we hard-coded 3072 as the maximum of
526 key_part length supported by innodb until bug#20629014 is fixed.
527
528 TODO: Remove the hard-code here after bug#20629014 is fixed.
529 */
530 INNODB_MAX_KEY_PART_LENGTH = 3072;
531 INNODB_MAX_KEY_PARTS = handler->max_key_parts();
532 destroy(handler);
533 plugin_unlock(nullptr, db_plugin);
534 }
535
536 uint Cache_temp_engine_properties::HEAP_MAX_KEY_LENGTH = 0;
537 uint Cache_temp_engine_properties::TEMPTABLE_MAX_KEY_LENGTH = 0;
538 uint Cache_temp_engine_properties::INNODB_MAX_KEY_LENGTH = 0;
539 uint Cache_temp_engine_properties::HEAP_MAX_KEY_PART_LENGTH = 0;
540 uint Cache_temp_engine_properties::TEMPTABLE_MAX_KEY_PART_LENGTH = 0;
541 uint Cache_temp_engine_properties::INNODB_MAX_KEY_PART_LENGTH = 0;
542 uint Cache_temp_engine_properties::HEAP_MAX_KEY_PARTS = 0;
543 uint Cache_temp_engine_properties::TEMPTABLE_MAX_KEY_PARTS = 0;
544 uint Cache_temp_engine_properties::INNODB_MAX_KEY_PARTS = 0;
545
546 /**
547 Initialize the storage engine properties for the alternative temporary table
548 storage engines.
549 */
init_cache_tmp_engine_properties()550 void init_cache_tmp_engine_properties() {
551 DBUG_ASSERT(!current_thd);
552 THD *thd = new THD();
553 thd->thread_stack = pointer_cast<char *>(&thd);
554 thd->store_globals();
555 Cache_temp_engine_properties::init(thd);
556 delete thd;
557 }
558
559 /**
560 Get the minimum of max_key_length/part_length/parts.
561 The minimum is between HEAP engine and internal_tmp_disk_storage_engine.
562
563 @param[out] max_key_length Minimum of max_key_length
564 @param[out] max_key_part_length Minimum of max_key_part_length
565 @param[out] max_key_parts Minimum of max_key_parts
566 */
567
get_max_key_and_part_length(uint * max_key_length,uint * max_key_part_length,uint * max_key_parts)568 void get_max_key_and_part_length(uint *max_key_length,
569 uint *max_key_part_length,
570 uint *max_key_parts) {
571 // Make sure these cached properties are initialized.
572 DBUG_ASSERT(Cache_temp_engine_properties::HEAP_MAX_KEY_LENGTH);
573
574 *max_key_length =
575 std::min(Cache_temp_engine_properties::HEAP_MAX_KEY_LENGTH,
576 Cache_temp_engine_properties::INNODB_MAX_KEY_LENGTH);
577 *max_key_part_length =
578 std::min(Cache_temp_engine_properties::HEAP_MAX_KEY_PART_LENGTH,
579 Cache_temp_engine_properties::INNODB_MAX_KEY_PART_LENGTH);
580 *max_key_parts = std::min(Cache_temp_engine_properties::HEAP_MAX_KEY_PARTS,
581 Cache_temp_engine_properties::INNODB_MAX_KEY_PARTS);
582 }
583
584 /**
585 Create a temporary name for one field if the field_name is empty.
586
587 @param thd Thread handle
588 @param item Item to name the field after
589 */
590
create_tmp_table_field_tmp_name(THD * thd,Item * item)591 static const char *create_tmp_table_field_tmp_name(THD *thd, Item *item) {
592 StringBuffer<STRING_BUFFER_USUAL_SIZE> field_name;
593 const ulonglong save_bits = thd->variables.option_bits;
594 thd->variables.option_bits &= ~OPTION_QUOTE_SHOW_CREATE;
595 item->print(
596 thd, &field_name,
597 enum_query_type(QT_NO_DEFAULT_DB | QT_SUBSELECT_AS_ONLY_SELECT_NUMBER));
598 thd->variables.option_bits = save_bits;
599 return thd->mem_strdup(field_name.c_ptr_safe());
600 }
601
602 /**
603 Helper function for create_tmp_table().
604
605 Insert a field at the head of the hidden field area.
606
607 @param table Temporary table
608 @param default_field Default value array pointer
609 @param from_field Original field array pointer
610 @param blob_field Array pointer to record fields index of blob type
611 @param field The registed hidden field
612 */
613
register_hidden_field(TABLE * table,Field ** default_field,Field ** from_field,uint * blob_field,Field * field)614 static void register_hidden_field(TABLE *table, Field **default_field,
615 Field **from_field, uint *blob_field,
616 Field *field) {
617 uint i;
618 Field **tmp_field = table->field;
619
620 /* Increase all of registed fields index */
621 for (i = 0; i < table->s->fields; i++)
622 tmp_field[i]->set_field_index(tmp_field[i]->field_index() + 1);
623
624 // Increase the field_index of visible blob field
625 for (i = 0; i < table->s->blob_fields; i++) blob_field[i]++;
626 // Insert field
627 table->field[-1] = field;
628 default_field[-1] = nullptr;
629 from_field[-1] = nullptr;
630 field->table = table;
631 field->orig_table = table;
632 field->set_field_index(0);
633
634 // Keep the field from being expanded by SELECT *.
635 field->set_hidden(dd::Column::enum_hidden_type::HT_HIDDEN_SQL);
636 }
637
638 /**
639 Helper function which evaluates correct TABLE_SHARE::real_row_type
640 for the temporary table.
641 */
set_real_row_type(TABLE * table)642 static void set_real_row_type(TABLE *table) {
643 HA_CREATE_INFO create_info;
644 create_info.row_type = table->s->row_type;
645 create_info.options |=
646 HA_LEX_CREATE_TMP_TABLE | HA_LEX_CREATE_INTERNAL_TMP_TABLE;
647 create_info.table_options = table->s->db_create_options;
648 table->s->real_row_type = table->file->get_real_row_type(&create_info);
649 }
650
651 /**
652 Moves to the end of the 'copy_func' array the elements which contain a
653 reference to an expression of the SELECT list of 'select'.
654 @param[in,out] copy_func array to sort
655 @param select query block to search in.
656 */
sort_copy_func(const SELECT_LEX * select,Func_ptr_array * copy_func)657 static void sort_copy_func(const SELECT_LEX *select,
658 Func_ptr_array *copy_func) {
659 /*
660 In the select->all_fields list, there are hidden elements first, then
661 non-hidden. Non-hidden are those of the SELECT list. Hidden ones are:
662 (a) those of GROUP BY, HAVING, ORDER BY
663 (b) those which have been extracted from higher-level elements (of the
664 SELECT, GROUP BY, etc) by split_sum_func() (when aggregates are
665 involved).
666
667 Note that the clauses in (a) are allowed to reference a non-hidden
668 expression through an alias (e.g. "SELECT a+2 AS x GROUP BY x+3"). The
669 clauses in (b) can reference non-hidden expressions without aliases if they
670 have been generated in a query transformation (for example when transforming
671 an IN subquery to a correlated EXISTS subquery ("(x, y) IN (SELECT expr1,
672 expr2 ...)" -> "EXISTS (SELECT * ... HAVING x = expr1 AND y = expr2 ...").
673
674 Let's go through the process of writing to the tmp table
675 (e.g. end_write(), end_write_group()). We also include here the
676 "pseudo-tmp table" embedded into REF_SLICE_ORDERED_GROUP_BY, used by
677 end_send_group().
678 (1) we switch to the REF_SLICE used to read from that tmp table
679 (2.1) we (copy_fields() part 1) copy some columns from the
680 output of the previous step of execution (e.g. the join's output) to the
681 tmp table
682 (2.2) (specifically for REF_SLICE_ORDERED_GROUP_BY in end_send_group()) we
683 (copy_fields() part 2) evaluate some expressions from the same previous
684 step of execution, with Item_copy::copy(). The mechanism of Item_copy is:
685 * copy() evaluates the expression and caches its value in memory
686 * val_*() returns the cached value;
687 so Item_copy::copy() for "a+2" evaluates "a+2" (using the join's value
688 of "a") and caches the value; then Item_copy::copy() for "x+3" evaluates
689 "x", through Item_ref (because of the alias), that Item_ref points to
690 the Item_copy for "a+2" (does not point to the "a+2" Item_func_plus
691 expression, as we advanced the REF_SLICE to TMP3); copy() on
692 "x+3" thus evaluates the Item_copy for "a+2" which returns the cached value.
693 This way, if "a+2" were rather some non-deterministic expression
694 (e.g. rand()), the logic above does only one evaluation of rand(), which is
695 correct (the two objects "x" and "a+2" in 'fields' thus have equal
696 values).
697 For this to work, the Item_copy for "x" must be copy()d after that
698 of "a+2", so it can use the value cached for "a+2". setup_copy_fields()
699 ensures this by putting Item_copy-s of hidden elements last.
700 (3) We are now done with copy_fields(). Next is copy_funcs(). It
701 is meant to evaluate expressions and store their values into the tmp table.
702 [ note that we could replace Item_copy in (2) with a real one-row tmp
703 table; then end_send_group() could just use copy_funcs() instead of
704 Item_copy: copy_funcs() would store into the tmp table's column which
705 would thus be the storage for the cached value ].
706 Because we advanced the REF_SLICE, when copy_funcs() evaluates an
707 expression which uses Item_ref, that Item_ref may point to a column of
708 the tmp table. It is thus important that this column has been filled
709 already. So the order of evaluation of expressions by copy_funcs() must
710 respect "dependencies".
711
712 It is incorrect to evaluate elements of (a) first if they refer to
713 non-hidden elements through aliases. It is incorrect to evaluate elements of
714 (b) first if they refer to non-hidden elements. So, we partition the
715 elements below, moving to the end the ones which reference other expressions
716 in the same query block. We use a stable partitioning
717 (std::stable_partition), to avoid disturbing any dependency already
718 reflected in the order.
719
720 A simpler and more robust solution would be to break the design that
721 hidden elements are always first in SELECT_LEX::all_fields: references
722 using aliases (in GROUP BY, HAVING, ORDER BY) would be added to
723 all_fields last (after the SELECT list); an inner element (split by
724 split_sum_func) would be added right before its containing element. That
725 would reflect dependencies naturally. But it is hard to implement, as
726 some code relies on the fact that non-hidden elements are last, and
727 other code relies on the fact that SELECT::fields is just a part of
728 SELECT::all_fields (i.e. they share 'next' pointers, in the
729 implementation).
730
731 You may wonder why setup_copy_fields() can solve the dependency problem
732 by putting all hidden elements last, while for the copy_func array we
733 have a (more complex) sort. It's because setup_copy_fields() is for
734 end_send_group() which handles only queries with GROUP BY without ORDER
735 BY, window functions or DISTINCT. So the hidden elements produced by
736 split_sum_func are only group aggregates (not anything from WFs), which
737 setup_copy_fields() ignores: these aggregates are thus not cached
738 (neither in Item_copy, nor in a further tmp table's row as there's no tmp
739 table); so any parent item which references them,
740 if evaluated, will reach to the aggregate, not to any cache
741 materializing the aggregate, so will get an up-to-date value.
742 Whereas with window functions, it's possible to have a hidden element be an
743 aggregate (produced by split_sum_func) _and_ be materialized (into a
744 further tmp table), so we cannot ignore such Item anymore: we have to
745 leave it at the beginning of the copy_func array. Except if it contains
746 an alias to an expression of the SELECT list: in that case, the sorting
747 will move it to the end, but will also move the aliased expression, and
748 their relative order will remain unchanged thanks to stable_partition, so
749 their evaluation will be in the right order.
750
751 So we walk each item to copy, put the ones that don't reference other
752 expressions in the query block first, and put those that reference other
753 expressions last.
754 */
755 const auto without_reference_to_select_expr = [select](const Func_ptr &ptr) {
756 // We cast 'const' away, but the walker will not modify '*select'.
757 uchar *walk_arg = const_cast<uchar *>(pointer_cast<const uchar *>(select));
758 return !ptr.func()->walk(&Item::references_select_expr_of,
759 // the reference might be in a subquery
760 enum_walk::SUBQUERY_PREFIX, walk_arg);
761 };
762 std::stable_partition(copy_func->begin(), copy_func->end(),
763 without_reference_to_select_expr);
764 }
765
766 /**
767 Helper function for create_tmp_table_* family for setting tmp table fields
768 to their place in record buffer
769
770 @param field field to set
771 @param pos field's position in table's record buffer
772 @param null_flags beginning of table's null bits buffer
773 @param null_count field's null bit in null bits buffer
774 */
775
relocate_field(Field * field,uchar * pos,uchar * null_flags,uint * null_count)776 inline void relocate_field(Field *field, uchar *pos, uchar *null_flags,
777 uint *null_count) {
778 if (!field->is_flag_set(NOT_NULL_FLAG)) {
779 field->move_field(pos, null_flags + *null_count / 8,
780 (uint8)1 << (*null_count & 7));
781 (*null_count)++;
782 } else
783 field->move_field(pos, nullptr, 0);
784 if (field->type() == MYSQL_TYPE_BIT) {
785 /* We have to reserve place for extra bits among null bits */
786 ((Field_bit *)field)
787 ->set_bit_ptr(null_flags + *null_count / 8, *null_count & 7);
788 (*null_count) += (field->field_length & 7);
789 }
790 field->reset();
791 }
792
793 /**
794 Create a temp table according to a field list.
795
796 Given field pointers are changed to point at tmp_table for
797 send_result_set_metadata. The table object is self contained: it's
798 allocated in its own memory root, as well as Field objects
799 created for table columns. Those Field objects are common to TABLE and
800 TABLE_SHARE.
801 This function will replace Item_sum items in 'fields' list with
802 corresponding Item_field items, pointing at the fields in the
803 temporary table, unless save_sum_fields is set to false.
804 The Item_field objects are created in THD memory root.
805
806 @param thd thread handle
807 @param param a description used as input to create the table
808 @param fields list of items that will be used to define
809 column types of the table (also see NOTES)
810 @param group Group key to use for temporary table, NULL if
811 none
812 @param distinct should table rows be distinct
813 @param save_sum_fields see NOTES
814 @param select_options
815 @param rows_limit
816 @param table_alias possible name of the temporary table that can
817 be used for name resolving; can be "".
818
819 @remark mysql_create_view() checks that views have less than
820 MAX_FIELDS columns.
821
822 @remark We may actually end up with a table without any columns at all.
823 See comment below: We don't have to store this.
824 */
825
826 #define STRING_TOTAL_LENGTH_TO_PACK_ROWS 128
827 #define AVG_STRING_LENGTH_TO_PACK_ROWS 64
828 #define RATIO_TO_PACK_ROWS 2
829
create_tmp_table(THD * thd,Temp_table_param * param,List<Item> & fields,ORDER * group,bool distinct,bool save_sum_fields,ulonglong select_options,ha_rows rows_limit,const char * table_alias)830 TABLE *create_tmp_table(THD *thd, Temp_table_param *param, List<Item> &fields,
831 ORDER *group, bool distinct, bool save_sum_fields,
832 ulonglong select_options, ha_rows rows_limit,
833 const char *table_alias) {
834 DBUG_TRACE;
835 if (!param->allow_group_via_temp_table)
836 group = nullptr; // Can't use group key
837
838 if (group != nullptr) distinct = false; // Can't use distinct
839
840 for (ORDER *tmp = group; tmp; tmp = tmp->next) {
841 /*
842 marker == MARKER_BIT means two things:
843 - store NULLs in the key, and
844 - convert BIT fields to 64-bit long, needed because MEMORY tables
845 can't index BIT fields.
846 */
847 (*tmp->item)->marker = Item::MARKER_BIT;
848 }
849
850 /**
851 When true, enforces unique constraint (by adding a hidden hash_field and
852 creating a key over this field) when:
853 (1) unique key is too long, or
854 (2) number of key parts in distinct key is too big, or
855 (3) the caller has requested it.
856 */
857 bool unique_constraint_via_hash_field = false;
858
859 /*
860 When loose index scan is employed as access method, it already
861 computes all groups and the result of all aggregate functions. We
862 make space for the items of the aggregate function in the list of
863 functions Temp_table_param::items_to_copy, so that the values of
864 these items are stored in the temporary table.
865 */
866 uint copy_func_count = param->func_count;
867 if (param->precomputed_group_by) copy_func_count += param->sum_func_count;
868 /* Treat sum functions as normal ones when loose index scan is used. */
869 save_sum_fields |= param->precomputed_group_by;
870
871 // 4096 since (sizeof(TABLE) + sizeof(TABLE_SHARE) ~= 3KB)
872 MEM_ROOT own_root(key_memory_TABLE, 4096);
873
874 param->keyinfo = static_cast<KEY *>(own_root.Alloc(sizeof(*param->keyinfo)));
875
876 const uint field_count =
877 param->field_count + param->func_count + param->sum_func_count;
878 try {
879 param->copy_fields.reserve(field_count);
880 } catch (std::bad_alloc &) {
881 return nullptr;
882 }
883
884 TABLE_SHARE *share = new (&own_root) TABLE_SHARE;
885 TABLE *table = new (&own_root) TABLE;
886 if (table == nullptr || share == nullptr) return nullptr;
887
888 // NOTE: reg_field/default_field/from_field correspond 1:1 to each other,
889 // except that reg_field contains an extra nullptr marker at the end.
890 // (They should have been a struct, but we cannot, since the reg_field
891 // array ends up in the TABLE object, which expects a flat array.)
892 // blob_field is a separate array, which indexes into these.
893 Field **reg_field = own_root.ArrayAlloc<Field *>(field_count + 2);
894 Field **default_field = own_root.ArrayAlloc<Field *>(field_count + 1);
895 Field **from_field = own_root.ArrayAlloc<Field *>(field_count + 1);
896 uint *blob_field = own_root.ArrayAlloc<uint>(field_count + 2);
897 if (reg_field == nullptr || default_field == nullptr ||
898 from_field == nullptr || blob_field == nullptr)
899 return nullptr;
900 memset(reg_field, 0, sizeof(Field *) * (field_count + 2));
901 memset(default_field, 0, sizeof(Field *) * (field_count + 1));
902 memset(from_field, 0, sizeof(Field *) * (field_count + 1));
903
904 // Leave the first place to be prepared for hash_field
905 reg_field++;
906 default_field++;
907 from_field++;
908 table->init_tmp_table(thd, share, &own_root, param->table_charset,
909 table_alias, reg_field, blob_field, false);
910
911 auto free_tmp_table_guard =
912 create_scope_guard([thd, table] { free_tmp_table(thd, table); });
913
914 /*
915 We will use TABLE_SHARE's MEM_ROOT for all allocations, so TABLE's
916 MEM_ROOT remains uninitialized.
917 TABLE_SHARE's MEM_ROOT is a copy of own_root, upon error free_tmp_table()
918 will free it.
919 */
920 Swap_mem_root_guard mem_root_guard(thd, &share->mem_root);
921
922 param->items_to_copy =
923 new (&share->mem_root) Func_ptr_array(&share->mem_root);
924 if (param->items_to_copy == nullptr) return nullptr; /* purecov: inspected */
925 if (param->items_to_copy->reserve(copy_func_count)) return nullptr;
926
927 if (param->schema_table) share->db = INFORMATION_SCHEMA_NAME;
928
929 /* Calculate which type of fields we will store in the temporary table */
930
931 share->reclength = 0;
932 ulong string_total_length = 0;
933 ulong distinct_key_length = 0;
934 uint null_count = 0;
935 uint hidden_null_count = 0;
936 share->blob_fields = 0;
937 uint group_null_items = 0;
938 uint string_count = 0;
939 uint fieldnr = 0;
940 param->using_outer_summary_function = false;
941 long hidden_field_count = param->hidden_field_count;
942 const bool not_all_columns = !(select_options & TMP_TABLE_ALL_COLUMNS);
943 /*
944 total_uneven_bit_length is uneven bit length for visible fields
945 hidden_uneven_bit_length is uneven bit length for hidden fields
946 */
947 uint total_uneven_bit_length = 0;
948 uint hidden_uneven_bit_length = 0;
949
950 for (Item &refitem : fields) {
951 Item *item = &refitem;
952 Item::Type type = item->type();
953 const bool is_sum_func =
954 type == Item::SUM_FUNC_ITEM && !item->m_is_window_function;
955
956 if (type == Item::COPY_STR_ITEM) {
957 item = down_cast<Item_copy *>(item)->get_item();
958 type = item->type();
959 }
960
961 bool store_column = true;
962 if (not_all_columns) {
963 if (item->has_aggregation() && type != Item::SUM_FUNC_ITEM) {
964 if (item->used_tables() & OUTER_REF_TABLE_BIT)
965 item->update_used_tables();
966 if (type == Item::SUBSELECT_ITEM ||
967 (item->used_tables() & ~OUTER_REF_TABLE_BIT)) {
968 /*
969 Mark that we have ignored an item that refers to a summary
970 function. We need to know this if someone is going to use
971 DISTINCT on the result.
972 */
973 param->using_outer_summary_function = true;
974 store_column = false;
975 }
976 } else if (item->m_is_window_function) {
977 if (!param->m_window || param->m_window_frame_buffer) {
978 /*
979 A pre-windowing table; no point in storing WF.
980 Or a window's frame buffer:
981 - the window's WFs cannot be calculated yet
982 - same for later windows' WFs
983 - previous windows' WFs are already replaced with Item_field (so
984 don't come here).
985 */
986 store_column = false;
987 } else if (param->m_window != down_cast<Item_sum *>(item)->window()) {
988 // A later window's WF: no point in storing it in this table.
989 store_column = false;
990 }
991 } else if (item->has_wf()) {
992 /*
993 A non-WF expression containing a WF conservatively requires all
994 windows to have been processed, and is not stored in any of
995 windowing tables until the last one.
996 */
997 if (param->m_window == nullptr || !param->m_window->is_last())
998 store_column = false;
999 }
1000 if (item->const_item() && hidden_field_count <= 0)
1001 continue; // We don't have to store this
1002 }
1003
1004 if (store_column && is_sum_func && !group &&
1005 !save_sum_fields) { /* Can't calc group yet */
1006 Item_sum *sum_item = down_cast<Item_sum *>(item);
1007 for (uint i = 0; i < sum_item->get_arg_count(); i++) {
1008 DBUG_ASSERT(!distinct);
1009 Item *arg = sum_item->get_arg(i);
1010 if (!arg->const_item()) {
1011 Field *new_field = create_tmp_field(
1012 thd, table, arg, arg->type(), param->items_to_copy,
1013 &from_field[fieldnr], &default_field[fieldnr], group != nullptr,
1014 not_all_columns, false, false, false);
1015 if (new_field == nullptr) return nullptr; // Should be OOM
1016 new_field->set_field_index(fieldnr);
1017 reg_field[fieldnr++] = new_field;
1018 share->reclength += new_field->pack_length();
1019 if (new_field->is_flag_set(BLOB_FLAG)) {
1020 *blob_field++ = new_field->field_index();
1021 share->blob_fields++;
1022 }
1023 if (new_field->type() == MYSQL_TYPE_BIT)
1024 total_uneven_bit_length += new_field->field_length & 7;
1025 if (new_field->real_type() == MYSQL_TYPE_STRING ||
1026 new_field->real_type() == MYSQL_TYPE_VARCHAR) {
1027 string_count++;
1028 string_total_length += new_field->pack_length();
1029 }
1030
1031 thd->mem_root = mem_root_guard.old_mem_root();
1032 arg = sum_item->set_arg(i, thd, new Item_field(new_field));
1033 thd->mem_root = &share->mem_root;
1034
1035 if (!new_field->is_flag_set(NOT_NULL_FLAG)) {
1036 null_count++;
1037 /*
1038 new_field->maybe_null() is still false, it will be
1039 changed below. But we have to setup Item_field correctly
1040 */
1041 arg->maybe_null = true;
1042 }
1043 /* InnoDB temp table doesn't allow field with empty_name */
1044 if (!new_field->field_name)
1045 new_field->field_name = create_tmp_table_field_tmp_name(thd, item);
1046 }
1047 }
1048 } else if (store_column) {
1049 Field *new_field;
1050 if (param->schema_table) {
1051 new_field = create_tmp_field_for_schema(item, table);
1052 } else {
1053 /*
1054 Parameters of create_tmp_field():
1055
1056 (1) is a bit tricky:
1057 We need to set it to 0 in union, to get fill_record() to modify the
1058 temporary table.
1059 We need to set it to 1 on multi-table-update and in select to
1060 write rows to the temporary table.
1061 We here distinguish between UNION and multi-table-updates by the fact
1062 that in the later case group is set to the row pointer.
1063 (2) If item->marker == MARKER_BIT then we force create_tmp_field
1064 to create a 64-bit longs for BIT fields because HEAP
1065 tables can't index BIT fields directly. We do the same
1066 for distinct, as we want the distinct index to be
1067 usable in this case too.
1068 (3) This is the OUT table of windowing, there is a frame buffer, and
1069 the item is an expression which can store its value in a result_field
1070 (e.g. it is Item_func). In that case we pass copy_result_field=true.
1071 */
1072 new_field = create_tmp_field(
1073 thd, table, item, type, param->items_to_copy, &from_field[fieldnr],
1074 &default_field[fieldnr],
1075 group != nullptr, // (1)
1076 !param->force_copy_fields && (not_all_columns || group != nullptr),
1077 item->marker == Item::MARKER_BIT ||
1078 param->bit_fields_as_long, //(2)
1079 param->force_copy_fields,
1080 (param->m_window && // (3)
1081 param->m_window->frame_buffer_param() && item->is_result_field()));
1082 }
1083
1084 if (!new_field) {
1085 DBUG_ASSERT(thd->is_fatal_error());
1086 return nullptr; // Got OOM
1087 }
1088 /*
1089 Some group aggregate function use result_field to maintain their
1090 current value (e.g. Item_avg_field stores both count and sum there).
1091 But only for the group-by table. So do not set result_field if this is
1092 a tmp table for UNION or derived table materialization.
1093 */
1094 if (not_all_columns && type == Item::SUM_FUNC_ITEM)
1095 down_cast<Item_sum *>(item)->set_result_field(new_field);
1096 share->reclength += new_field->pack_length();
1097 if (!new_field->is_flag_set(NOT_NULL_FLAG)) null_count++;
1098 if (new_field->type() == MYSQL_TYPE_BIT)
1099 total_uneven_bit_length += new_field->field_length & 7;
1100 if (new_field->is_flag_set(BLOB_FLAG)) {
1101 *blob_field++ = fieldnr;
1102 share->blob_fields++;
1103 }
1104
1105 if (new_field->real_type() == MYSQL_TYPE_STRING ||
1106 new_field->real_type() == MYSQL_TYPE_VARCHAR) {
1107 string_count++;
1108 string_total_length += new_field->pack_length();
1109 }
1110 // In order to reduce footprint ask SE to pack variable-length fields.
1111 if (new_field->type() == MYSQL_TYPE_VAR_STRING ||
1112 new_field->type() == MYSQL_TYPE_VARCHAR)
1113 table->s->db_create_options |= HA_OPTION_PACK_RECORD;
1114
1115 if (item->marker == Item::MARKER_BIT && item->maybe_null) {
1116 group_null_items++;
1117 new_field->set_flag(GROUP_FLAG);
1118 }
1119 new_field->set_field_index(fieldnr);
1120 reg_field[fieldnr++] = new_field;
1121 /* InnoDB temp table doesn't allow field with empty_name */
1122 if (!new_field->field_name) {
1123 new_field->field_name = create_tmp_table_field_tmp_name(thd, item);
1124 }
1125
1126 /*
1127 Calculate length of distinct key. The goal is to decide what to use -
1128 key or unique constraint. As blobs force unique constraint on their
1129 own due to their length, they aren't taken into account.
1130 */
1131 if (distinct && hidden_field_count <= 0) {
1132 if (new_field->is_flag_set(BLOB_FLAG))
1133 unique_constraint_via_hash_field = true;
1134 else
1135 distinct_key_length += new_field->pack_length();
1136 }
1137 }
1138
1139 hidden_field_count--;
1140 if (hidden_field_count == 0) {
1141 /*
1142 This was the last hidden field; Remember how many hidden fields could
1143 have null
1144 */
1145 hidden_null_count = null_count;
1146 /*
1147 We need to update hidden_field_count as we may have stored group
1148 functions with constant arguments
1149 */
1150 param->hidden_field_count = fieldnr;
1151 null_count = 0;
1152 /*
1153 On last hidden field we store uneven bit length in
1154 hidden_uneven_bit_length and proceed calculation of
1155 uneven bits for visible fields into
1156 total_uneven_bit_length variable.
1157 */
1158 hidden_uneven_bit_length = total_uneven_bit_length;
1159 total_uneven_bit_length = 0;
1160 }
1161 } // end of for
1162
1163 DBUG_ASSERT(field_count >= fieldnr);
1164
1165 reg_field[fieldnr] = nullptr;
1166 *blob_field = 0; // End marker
1167 share->fields = fieldnr;
1168
1169 /*
1170 Different temp table engine supports different max_key_length
1171 and max_key_part_length. If HEAP engine is selected, it can be
1172 possible to convert into on-disk engine later. We must choose
1173 the minimal of max_key_length and max_key_part_length between
1174 HEAP engine and possible on-disk engine to verify whether unique
1175 constraint is needed so that the conversion goes well.
1176 */
1177 uint max_key_length;
1178 uint max_key_part_length;
1179 uint max_key_parts;
1180 get_max_key_and_part_length(&max_key_length, &max_key_part_length,
1181 &max_key_parts);
1182
1183 if (group) {
1184 DBUG_PRINT("info", ("Creating group key in temporary table"));
1185 table->group = group; /* Table is grouped by key */
1186 share->keys = 1;
1187 // Let each group expression know the column which materializes its value
1188 for (ORDER *cur_group = group; cur_group; cur_group = cur_group->next) {
1189 Field *field = (*cur_group->item)->get_tmp_table_field();
1190 DBUG_ASSERT(field->table == table);
1191 cur_group->field_in_tmp_table = field;
1192
1193 if ((*cur_group->item)->max_char_length() > CONVERT_IF_BIGGER_TO_BLOB)
1194 unique_constraint_via_hash_field = true;
1195 }
1196 if (param->group_parts > max_key_parts ||
1197 param->group_length > max_key_length ||
1198 param->group_length >= MAX_BLOB_WIDTH)
1199 unique_constraint_via_hash_field = true;
1200 // Use key definition created below only if the key isn't too long.
1201 // Otherwise a dedicated key over a hash value will be created and this
1202 // definition will be used by server to calc hash.
1203 if (!unique_constraint_via_hash_field) {
1204 param->keyinfo->table = table;
1205 param->keyinfo->is_visible = true;
1206 KEY_PART_INFO *key_part_info =
1207 share->mem_root.ArrayAlloc<KEY_PART_INFO>(param->group_parts + 1);
1208 if (key_part_info == nullptr) return nullptr;
1209 param->keyinfo->key_part = key_part_info;
1210 param->keyinfo->flags = HA_NOSAME;
1211 param->keyinfo->actual_flags = param->keyinfo->flags;
1212 param->keyinfo->usable_key_parts = param->group_parts;
1213 param->keyinfo->user_defined_key_parts = param->group_parts;
1214 param->keyinfo->actual_key_parts = param->keyinfo->user_defined_key_parts;
1215 param->keyinfo->rec_per_key = nullptr;
1216 // keyinfo->algorithm is set later, when storage engine is known
1217 param->keyinfo->set_rec_per_key_array(nullptr, nullptr);
1218 param->keyinfo->set_in_memory_estimate(IN_MEMORY_ESTIMATE_UNKNOWN);
1219 param->keyinfo->name = "<group_key>";
1220 for (ORDER *cur_group = group; cur_group;
1221 cur_group = cur_group->next, key_part_info++) {
1222 Field *field = cur_group->field_in_tmp_table;
1223 key_part_info->init_from_field(field);
1224
1225 /* In GROUP BY 'a' and 'a ' are equal for VARCHAR fields */
1226 key_part_info->key_part_flag |= HA_END_SPACE_ARE_EQUAL;
1227
1228 if (key_part_info->store_length > max_key_part_length) {
1229 unique_constraint_via_hash_field = true;
1230 break;
1231 }
1232 }
1233 table->key_info = param->keyinfo;
1234 share->key_info = param->keyinfo;
1235 share->key_parts = param->keyinfo->user_defined_key_parts;
1236 }
1237 } else if (distinct && share->fields != param->hidden_field_count) {
1238 /*
1239 Create an unique key or an unique constraint over all columns
1240 that should be in the result. In the temporary table, there are
1241 'param->hidden_field_count' extra columns, whose null bits are stored
1242 in the first 'hidden_null_pack_length' bytes of the row.
1243 */
1244 DBUG_PRINT("info", ("hidden_field_count: %d", param->hidden_field_count));
1245 share->keys = 1;
1246 table->is_distinct = true;
1247 if (!unique_constraint_via_hash_field) {
1248 param->keyinfo->table = table;
1249 param->keyinfo->is_visible = true;
1250 param->keyinfo->user_defined_key_parts =
1251 share->fields - param->hidden_field_count;
1252 param->keyinfo->actual_key_parts = param->keyinfo->user_defined_key_parts;
1253 KEY_PART_INFO *key_part_info = share->mem_root.ArrayAlloc<KEY_PART_INFO>(
1254 param->keyinfo->user_defined_key_parts);
1255 if (key_part_info == nullptr) return nullptr;
1256 param->keyinfo->key_part = key_part_info;
1257 param->keyinfo->flags = HA_NOSAME | HA_NULL_ARE_EQUAL;
1258 param->keyinfo->actual_flags = param->keyinfo->flags;
1259 param->keyinfo->name = "<auto_distinct_key>";
1260 // keyinfo->algorithm is set later, when storage engine is known
1261 param->keyinfo->set_in_memory_estimate(IN_MEMORY_ESTIMATE_UNKNOWN);
1262
1263 // Set up records-per-key estimates.
1264 ulong *rec_per_key = share->mem_root.ArrayAlloc<ulong>(
1265 param->keyinfo->user_defined_key_parts);
1266 rec_per_key_t *rec_per_key_float =
1267 share->mem_root.ArrayAlloc<rec_per_key_t>(
1268 param->keyinfo->user_defined_key_parts);
1269 if (rec_per_key == nullptr || rec_per_key_float == nullptr)
1270 return nullptr;
1271 param->keyinfo->set_rec_per_key_array(rec_per_key, rec_per_key_float);
1272 for (unsigned key_part_idx = 0;
1273 key_part_idx < param->keyinfo->user_defined_key_parts;
1274 ++key_part_idx) {
1275 param->keyinfo->rec_per_key[key_part_idx] = 0;
1276 param->keyinfo->set_records_per_key(key_part_idx, REC_PER_KEY_UNKNOWN);
1277 }
1278
1279 /* Create a distinct key over the columns we are going to return */
1280 for (unsigned i = param->hidden_field_count; i < share->fields;
1281 i++, key_part_info++) {
1282 key_part_info->init_from_field(table->field[i]);
1283 if (key_part_info->store_length > max_key_part_length) {
1284 unique_constraint_via_hash_field = true;
1285 break;
1286 }
1287 }
1288 table->key_info = param->keyinfo;
1289 share->key_info = param->keyinfo;
1290 share->key_parts = param->keyinfo->user_defined_key_parts;
1291 }
1292 }
1293
1294 /*
1295 To enforce unique constraint we need to add a field to hold key's hash
1296 A1) distinct key is too long
1297 A2) number of keyparts in distinct key is too big
1298 A3) caller cannot accept distinct via indexes (e.g. because it wants
1299 to turn off the checking at some point)
1300 */
1301 if (distinct) {
1302 if (distinct_key_length > max_key_length || // 1
1303 (fieldnr - param->hidden_field_count) > max_key_parts || // 2
1304 param->force_hash_field_for_unique) { // 3
1305 unique_constraint_via_hash_field = true;
1306 }
1307 }
1308
1309 if (unique_constraint_via_hash_field) {
1310 Field_longlong *field = new (&share->mem_root)
1311 Field_longlong(sizeof(ulonglong), false, "<hash_field>", true);
1312 if (!field) {
1313 /* purecov: begin inspected */
1314 DBUG_ASSERT(thd->is_fatal_error());
1315 return nullptr; // Got OOM
1316 /* purecov: end */
1317 }
1318
1319 // Mark hash_field as NOT NULL
1320 field->set_flag(NOT_NULL_FLAG);
1321 // Register hash_field as a hidden field.
1322 register_hidden_field(table, &default_field[0], &from_field[0],
1323 share->blob_field, field);
1324 // Repoint arrays
1325 table->field--;
1326 default_field--;
1327 from_field--;
1328 share->reclength += field->pack_length();
1329 share->fields = ++fieldnr;
1330 param->hidden_field_count++;
1331 share->field--;
1332 table->hash_field = field;
1333 }
1334
1335 if (setup_tmp_table_handler(table, select_options, false,
1336 param->schema_table))
1337 return nullptr; /* purecov: inspected */
1338
1339 if (table->s->keys == 1 && table->key_info)
1340 table->key_info->algorithm = table->file->get_default_index_algorithm();
1341
1342 table->hidden_field_count = param->hidden_field_count;
1343
1344 if (!unique_constraint_via_hash_field)
1345 share->reclength += group_null_items; // null flag is stored separately
1346
1347 if (share->blob_fields == 0) {
1348 /* We need to ensure that first byte is not 0 for the delete link */
1349 if (param->hidden_field_count)
1350 hidden_null_count++;
1351 else
1352 null_count++;
1353 }
1354 uint hidden_null_pack_length =
1355 (hidden_null_count + 7 + hidden_uneven_bit_length) / 8;
1356 share->null_bytes = (hidden_null_pack_length +
1357 (null_count + total_uneven_bit_length + 7) / 8);
1358 share->reclength += share->null_bytes;
1359 if (share->reclength == 0) share->reclength = 1; // Dummy select
1360
1361 share->null_fields = null_count + hidden_null_count;
1362
1363 if (alloc_record_buffers(table)) return nullptr;
1364
1365 uchar *pos = table->record[0] + share->null_bytes;
1366 null_count = (share->blob_fields == 0) ? 1 : 0;
1367 hidden_field_count = param->hidden_field_count;
1368 DBUG_ASSERT((uint)hidden_field_count <= share->fields);
1369 for (uint i = 0; i < share->fields; i++) {
1370 Field *field = table->field[i];
1371
1372 if (!field->is_flag_set(NOT_NULL_FLAG)) {
1373 if (field->is_flag_set(GROUP_FLAG) && !unique_constraint_via_hash_field) {
1374 /*
1375 We have to reserve one byte here for NULL bits,
1376 as this is updated by 'end_update()'
1377 */
1378 *pos++ = 0; // Null is stored here
1379 }
1380 }
1381 relocate_field(field, pos, table->record[0], &null_count);
1382 pos += field->pack_length();
1383 if (!--hidden_field_count)
1384 null_count = (null_count + 7) & ~7; // move to next byte
1385 }
1386
1387 /* Use packed rows if there is blobs or a lot of space to gain */
1388 bool use_packed_rows = false;
1389 if (share->blob_fields != 0 ||
1390 (string_total_length >= STRING_TOTAL_LENGTH_TO_PACK_ROWS &&
1391 (share->reclength / string_total_length <= RATIO_TO_PACK_ROWS ||
1392 string_total_length / string_count >= AVG_STRING_LENGTH_TO_PACK_ROWS)))
1393 use_packed_rows = true;
1394
1395 if (!use_packed_rows) share->db_create_options &= ~HA_OPTION_PACK_RECORD;
1396
1397 param->func_count = param->items_to_copy->size();
1398 DBUG_ASSERT(param->func_count <= copy_func_count); // Used <= allocated
1399 sort_copy_func(thd->lex->current_select(), param->items_to_copy);
1400 uchar *bitmaps = static_cast<uchar *>(
1401 share->mem_root.Alloc(bitmap_buffer_size(field_count + 1) * 3));
1402 if (bitmaps == nullptr) return nullptr;
1403 setup_tmp_table_column_bitmaps(table, bitmaps);
1404
1405 for (uint i = 0; i < share->fields; i++) {
1406 Field *field = table->field[i];
1407 /*
1408 Test if there is a default field value. The test for ->ptr is to skip
1409 'offset' fields generated by initalize_tables
1410 */
1411 if (default_field[i] && default_field[i]->field_ptr() != nullptr) {
1412 /*
1413 default_field[i] is set only in the cases when 'field' can
1414 inherit the default value that is defined for the field referred
1415 by the Item_field object from which 'field' has been created.
1416 */
1417 Field *orig_field = default_field[i];
1418 /*
1419 Get the value from default_values. Note that orig_field->ptr might not
1420 point into record[0] if previous step is REF_SLICE_ORDERED_GROUP_BY and
1421 we are creating a tmp table to materialize the query's result.
1422 */
1423 ptrdiff_t diff = orig_field->table->default_values_offset();
1424 Field *f_in_record0 = orig_field->table->field[orig_field->field_index()];
1425 if (f_in_record0->is_real_null(diff))
1426 field->set_null();
1427 else {
1428 field->set_notnull();
1429 memcpy(field->field_ptr(), f_in_record0->field_ptr() + diff,
1430 field->pack_length());
1431 }
1432 }
1433
1434 if (from_field[i]) {
1435 /* This column is directly mapped to a column in the GROUP BY clause. */
1436 if (param->m_window && param->m_window->frame_buffer_param() &&
1437 field->is_flag_set(FIELD_IS_MARKED)) {
1438 Temp_table_param *window_fb = param->m_window->frame_buffer_param();
1439 // Grep for FIELD_IS_MARKED in this file.
1440 field->is_flag_set(FIELD_IS_MARKED) ? field->clear_flag(FIELD_IS_MARKED)
1441 : field->set_flag(FIELD_IS_MARKED);
1442 window_fb->copy_fields.emplace_back(from_field[i], field,
1443 save_sum_fields);
1444 } else {
1445 param->copy_fields.emplace_back(field, from_field[i], save_sum_fields);
1446 }
1447 }
1448
1449 // fix table name in field entry
1450 field->table_name = &table->alias;
1451 }
1452
1453 store_record(table, s->default_values); // Make empty default record
1454
1455 /*
1456 Push the LIMIT clause to the temporary table creation, so that we
1457 materialize only up to 'rows_limit' records instead of all result records.
1458 */
1459 share->max_rows = std::min(share->max_rows, rows_limit);
1460 param->end_write_records = rows_limit;
1461
1462 if (group && !unique_constraint_via_hash_field) {
1463 if (param->can_use_pk_for_unique) share->primary_key = 0;
1464 param->keyinfo->key_length = 0; // Will compute the sum of the parts below.
1465 /*
1466 Here, we have to make the group fields point to the right record
1467 position.
1468 */
1469 KEY_PART_INFO *key_part_info = param->keyinfo->key_part;
1470 param->group_buff = share->mem_root.ArrayAlloc<uchar>(param->group_length);
1471 if (param->group_buff == nullptr) return nullptr;
1472 uchar *group_buff = param->group_buff;
1473 for (ORDER *cur_group = group; cur_group;
1474 cur_group = cur_group->next, key_part_info++) {
1475 Field *field = cur_group->field_in_tmp_table;
1476 const bool maybe_null = (*cur_group->item)->maybe_null;
1477 key_part_info->init_from_field(key_part_info->field);
1478 param->keyinfo->key_length += key_part_info->store_length;
1479
1480 cur_group->buff = pointer_cast<char *>(group_buff);
1481 cur_group->field_in_tmp_table =
1482 field->new_key_field(thd->mem_root, table, group_buff + maybe_null);
1483
1484 if (!cur_group->field_in_tmp_table)
1485 return nullptr; /* purecov: inspected */
1486
1487 if (maybe_null) {
1488 /*
1489 To be able to group on NULL, we reserved place in group_buff
1490 for the NULL flag just before the column. (see above).
1491 The field data is after this flag.
1492 The NULL flag is updated in 'end_update()' and 'end_write()'
1493 */
1494 param->keyinfo->flags |= HA_NULL_ARE_EQUAL; // def. that NULL == NULL
1495 cur_group->buff++; // Pointer to field data
1496 group_buff++; // Skipp null flag
1497 }
1498 group_buff += cur_group->field_in_tmp_table->pack_length();
1499 }
1500 }
1501
1502 if (distinct && share->fields != param->hidden_field_count &&
1503 !unique_constraint_via_hash_field) {
1504 if (param->can_use_pk_for_unique) share->primary_key = 0;
1505 param->keyinfo->key_length = 0; // Will compute the sum of the parts below.
1506 /*
1507 Here, we have to make the key fields point to the right record
1508 position.
1509 */
1510 KEY_PART_INFO *key_part_info = param->keyinfo->key_part;
1511 for (uint i = param->hidden_field_count; i < share->fields;
1512 i++, key_part_info++) {
1513 key_part_info->init_from_field(table->field[i]);
1514 param->keyinfo->key_length += key_part_info->store_length;
1515 }
1516 }
1517
1518 // Create a key over hash_field to enforce unique constraint
1519 if (unique_constraint_via_hash_field) {
1520 KEY *hash_key;
1521 KEY_PART_INFO *hash_kpi;
1522
1523 if (!multi_alloc_root(&share->mem_root, &hash_key, sizeof(*hash_key),
1524 &hash_kpi, sizeof(*hash_kpi), // Only one key part
1525 NullS))
1526 return nullptr;
1527 table->key_info = share->key_info = hash_key;
1528 share->key_parts = 1;
1529 hash_key->table = table;
1530 hash_key->key_part = hash_kpi;
1531 hash_key->actual_flags = hash_key->flags = HA_NULL_ARE_EQUAL;
1532 hash_key->actual_key_parts = hash_key->usable_key_parts = 1;
1533 hash_key->user_defined_key_parts = 1;
1534 hash_key->set_rec_per_key_array(nullptr, nullptr);
1535 hash_key->algorithm = table->file->get_default_index_algorithm();
1536 hash_key->set_in_memory_estimate(IN_MEMORY_ESTIMATE_UNKNOWN);
1537 if (distinct)
1538 hash_key->name = "<hash_distinct_key>";
1539 else
1540 hash_key->name = "<hash_group_key>";
1541 hash_kpi->init_from_field(table->hash_field);
1542 hash_key->key_length = hash_kpi->store_length;
1543 param->keyinfo = hash_key;
1544 }
1545
1546 if (thd->is_fatal_error()) // If end of memory
1547 return nullptr; /* purecov: inspected */
1548
1549 set_real_row_type(table);
1550
1551 if (!param->skip_create_table) {
1552 if (instantiate_tmp_table(thd, table)) return nullptr;
1553 }
1554
1555 DEBUG_SYNC(thd, "tmp_table_created");
1556
1557 free_tmp_table_guard.commit();
1558
1559 return table;
1560 }
1561
1562 /**
1563 Create a temporary table to weed out duplicate rowid combinations
1564
1565
1566 @param thd Thread handle
1567 @param uniq_tuple_length_arg Length of the table's column
1568 @param sjtbl Update sjtbl->[start_]recinfo values which
1569 will be needed if we'll need to convert the
1570 created temptable from HEAP to MyISAM/Maria.
1571
1572 @details
1573 create_duplicate_weedout_tmp_table()
1574
1575 Create a temporary table to weed out duplicate rowid combinations. The
1576 table has a single column that is a concatenation of all rowids in the
1577 combination.
1578
1579 Depending on the needed length, there are two cases:
1580
1581 1. When the length of the column < max_key_length:
1582
1583 CREATE TABLE tmp (col VARBINARY(n) NOT NULL, UNIQUE KEY(col));
1584
1585 2. Otherwise (not a valid SQL syntax but internally supported):
1586
1587 CREATE TABLE tmp (col VARBINARY NOT NULL, UNIQUE CONSTRAINT(col));
1588
1589 The code in this function was produced by extraction of relevant parts
1590 from create_tmp_table().
1591
1592 @return
1593 created table
1594 NULL on error
1595 */
1596
create_duplicate_weedout_tmp_table(THD * thd,uint uniq_tuple_length_arg,SJ_TMP_TABLE * sjtbl)1597 TABLE *create_duplicate_weedout_tmp_table(THD *thd, uint uniq_tuple_length_arg,
1598 SJ_TMP_TABLE *sjtbl) {
1599 MEM_ROOT *mem_root_save, own_root;
1600 TABLE *table;
1601 TABLE_SHARE *share;
1602 Field **reg_field;
1603 KEY_PART_INFO *key_part_info;
1604 KEY *keyinfo;
1605 uchar *group_buff;
1606 uchar *bitmaps;
1607 uint *blob_field;
1608 bool unique_constraint_via_hash_field = false;
1609 Field *field, *key_field, *hash_field = nullptr;
1610 uint null_pack_length;
1611 uchar *null_flags;
1612 uchar *pos;
1613 uint i;
1614
1615 DBUG_TRACE;
1616 DBUG_ASSERT(!sjtbl || !sjtbl->is_confluent);
1617
1618 DBUG_EXECUTE_IF("create_duplicate_weedout_tmp_table_error", {
1619 my_error(ER_UNKNOWN_ERROR, MYF(0));
1620 return nullptr;
1621 });
1622
1623 /* STEP 1: Figure if we'll be using a key or blob+constraint */
1624 if (uniq_tuple_length_arg > CONVERT_IF_BIGGER_TO_BLOB)
1625 unique_constraint_via_hash_field = true;
1626
1627 /* STEP 2: Allocate memory for temptable description */
1628 init_sql_alloc(key_memory_TABLE, &own_root, TABLE_ALLOC_BLOCK_SIZE, 0);
1629 if (!multi_alloc_root(
1630 &own_root, &table, sizeof(*table), &share, sizeof(*share), ®_field,
1631 sizeof(Field *) * (1 + 2), &blob_field, sizeof(uint) * 3, &keyinfo,
1632 sizeof(*keyinfo), &key_part_info, sizeof(*key_part_info) * 2,
1633 &group_buff,
1634 (!unique_constraint_via_hash_field ? uniq_tuple_length_arg : 0),
1635 &bitmaps, bitmap_buffer_size(1) * 3, NullS)) {
1636 return nullptr;
1637 }
1638
1639 /* STEP 3: Create TABLE description */
1640 new (table) TABLE;
1641 memset(reg_field, 0, sizeof(Field *) * 3);
1642 table->init_tmp_table(thd, share, &own_root, nullptr, "weedout-tmp",
1643 reg_field, blob_field, false);
1644
1645 mem_root_save = thd->mem_root;
1646 thd->mem_root = &share->mem_root;
1647
1648 uint reclength = 0;
1649 uint null_count = 0;
1650
1651 /* Create the field */
1652 if (unique_constraint_via_hash_field) {
1653 Field_longlong *field_ll = new (&share->mem_root)
1654 Field_longlong(sizeof(ulonglong), false, "<hash_field>", true);
1655 if (!field_ll) {
1656 DBUG_ASSERT(thd->is_fatal_error());
1657 goto err; // Got OOM
1658 }
1659 // Mark hash_field as NOT NULL
1660 field_ll->set_flag(NOT_NULL_FLAG);
1661 *(reg_field++) = hash_field = field_ll;
1662 if (sjtbl) sjtbl->hash_field = field_ll;
1663 table->hash_field = field_ll;
1664 field_ll->table = table;
1665 field_ll->orig_table = table;
1666 share->fields++;
1667 field_ll->set_field_index(0);
1668 reclength = field_ll->pack_length();
1669 table->hidden_field_count++;
1670 }
1671 {
1672 /*
1673 For the sake of uniformity, always use Field_varstring (altough we could
1674 use Field_string for shorter keys)
1675 */
1676 field = new (thd->mem_root) Field_varstring(
1677 uniq_tuple_length_arg, false, "rowids", share, &my_charset_bin);
1678 if (!field) return nullptr;
1679 field->table = table;
1680 field->auto_flags = Field::NONE;
1681 field->set_flag(NOT_NULL_FLAG);
1682 field->set_flag(BINARY_FLAG);
1683 field->set_flag(NO_DEFAULT_VALUE_FLAG);
1684 field->init(table);
1685 field->orig_table = nullptr;
1686 *(reg_field++) = field;
1687 *blob_field = 0;
1688 *reg_field = nullptr;
1689
1690 field->set_field_index(share->fields);
1691 share->fields++;
1692 share->blob_fields = 0;
1693 reclength += field->pack_length();
1694 null_count++;
1695 }
1696
1697 /* See also create_tmp_table() */
1698 if (setup_tmp_table_handler(table, 0LL, unique_constraint_via_hash_field,
1699 false))
1700 goto err;
1701
1702 null_pack_length = 1;
1703 reclength += null_pack_length;
1704
1705 share->reclength = reclength;
1706 share->null_bytes = null_pack_length;
1707 share->null_fields = null_count;
1708
1709 if (alloc_record_buffers(table)) goto err;
1710 setup_tmp_table_column_bitmaps(table, bitmaps);
1711
1712 null_flags = table->record[0];
1713
1714 pos = table->record[0] + null_pack_length;
1715 null_count = 1;
1716 for (i = 0, reg_field = table->field; i < share->fields; i++, reg_field++) {
1717 Field *field_r = *reg_field;
1718 uint length;
1719
1720 relocate_field(field_r, pos, null_flags, &null_count);
1721 length = field_r->pack_length();
1722 pos += length;
1723
1724 // fix table name in field entry
1725 field_r->table_name = &table->alias;
1726 }
1727
1728 // Create a key over param->hash_field to enforce unique constraint
1729 if (unique_constraint_via_hash_field) {
1730 KEY *hash_key = keyinfo;
1731 KEY_PART_INFO *hash_kpi = key_part_info;
1732
1733 share->keys = 1;
1734 table->key_info = share->key_info = hash_key;
1735 hash_key->table = table;
1736 hash_key->key_part = hash_kpi;
1737 hash_key->actual_flags = hash_key->flags = HA_NULL_ARE_EQUAL;
1738 hash_kpi->init_from_field(hash_field);
1739 hash_key->key_length = hash_kpi->store_length;
1740 } else {
1741 DBUG_PRINT("info", ("Creating group key in temporary table"));
1742 share->keys = 1;
1743 table->key_info = table->s->key_info = keyinfo;
1744 keyinfo->key_part = key_part_info;
1745 keyinfo->actual_flags = keyinfo->flags = HA_NOSAME;
1746 keyinfo->key_length = 0;
1747 {
1748 key_part_info->init_from_field(field);
1749 key_part_info->bin_cmp = true;
1750
1751 key_field = field->new_key_field(thd->mem_root, table, group_buff);
1752 if (!key_field) goto err;
1753 key_part_info->key_part_flag |= HA_END_SPACE_ARE_EQUAL; // todo need
1754 // this?
1755 keyinfo->key_length += key_part_info->length;
1756 }
1757 }
1758 {
1759 table->key_info->user_defined_key_parts = 1;
1760 table->key_info->usable_key_parts = 1;
1761 table->key_info->actual_key_parts = table->key_info->user_defined_key_parts;
1762 share->key_parts = table->key_info->user_defined_key_parts;
1763 table->key_info->set_rec_per_key_array(nullptr, nullptr);
1764 table->key_info->algorithm = table->file->get_default_index_algorithm();
1765 table->key_info->set_in_memory_estimate(IN_MEMORY_ESTIMATE_UNKNOWN);
1766 table->key_info->name = "weedout_key";
1767 }
1768
1769 if (thd->is_fatal_error()) // If end of memory
1770 goto err;
1771
1772 set_real_row_type(table);
1773
1774 if (instantiate_tmp_table(thd, table)) goto err;
1775
1776 thd->mem_root = mem_root_save;
1777 return table;
1778
1779 err:
1780 thd->mem_root = mem_root_save;
1781 table->file->ha_index_or_rnd_end();
1782 free_tmp_table(thd, table); /* purecov: inspected */
1783 return nullptr; /* purecov: inspected */
1784 }
1785
1786 /****************************************************************************/
1787
1788 /**
1789 Create an, optionally reduced, TABLE object with properly set up Field list
1790 from a list of field definitions.
1791
1792 @details
1793 When is_virtual arg is true:
1794 The created table doesn't have a table handler associated with
1795 it, has no keys, no group/distinct, no copy_funcs array.
1796 The sole purpose of this TABLE object is to use the power of Field
1797 class to read/write data to/from table->record[0]. Then one can store
1798 the record in any container (RB tree, hash, etc).
1799 The table is created in THD mem_root, so are the table's fields.
1800 Consequently, if you don't BLOB fields, you don't need to free it.
1801 When is_virtual is false:
1802 This function creates a normal tmp table out of fields' definitions,
1803 rather than from lst of items. This is the main difference with
1804 create_tmp_table. Also the table created here doesn't do grouping,
1805 doesn't have indexes and copy_funcs/fields. The purpose is to be able to
1806 create result table for table functions out of fields' definitions
1807 without need in intermediate list of items.
1808
1809 @param thd connection handle
1810 @param field_list list of column definitions
1811 @param is_virtual if true, then it's effectively only a record buffer
1812 with wrapper, used e.g to store vars in SP
1813 if false, then a normal table, which can hold
1814 records, is created
1815 @param select_options options for non-virtual tmp table
1816 @param alias table's alias
1817
1818 @return
1819 0 if out of memory, TABLE object in case of success
1820 */
1821
create_tmp_table_from_fields(THD * thd,List<Create_field> & field_list,bool is_virtual,ulonglong select_options,const char * alias)1822 TABLE *create_tmp_table_from_fields(THD *thd, List<Create_field> &field_list,
1823 bool is_virtual, ulonglong select_options,
1824 const char *alias) {
1825 uint field_count = field_list.elements;
1826 uint blob_count = 0;
1827 Field **reg_field;
1828 Create_field *cdef; /* column definition */
1829 uint record_length = 0;
1830 uint null_count = 0; /* number of columns which may be null */
1831 uint null_pack_length; /* NULL representation array length */
1832 uint *blob_field;
1833 uchar *bitmaps;
1834 TABLE *table;
1835 TABLE_SHARE *share;
1836 MEM_ROOT own_root, *m_root;
1837 /*
1838 total_uneven_bit_length is uneven bit length for BIT fields
1839 */
1840 uint total_uneven_bit_length = 0;
1841
1842 if (!is_virtual) {
1843 init_sql_alloc(key_memory_TABLE, &own_root, TABLE_ALLOC_BLOCK_SIZE, 0);
1844 m_root = &own_root;
1845 } else
1846 m_root = thd->mem_root;
1847
1848 if (!multi_alloc_root(m_root, &table, sizeof(*table), &share, sizeof(*share),
1849 ®_field, (field_count + 1) * sizeof(Field *),
1850 &blob_field, (field_count + 1) * sizeof(uint), &bitmaps,
1851 bitmap_buffer_size(field_count) * 3, NullS))
1852 return nullptr;
1853
1854 new (table) TABLE;
1855 new (share) TABLE_SHARE;
1856 table->init_tmp_table(thd, share, m_root, nullptr, alias, reg_field,
1857 blob_field, is_virtual);
1858
1859 /* Create all fields and calculate the total length of record */
1860 List_iterator_fast<Create_field> it(field_list);
1861 uint idx = 0;
1862 while ((cdef = it++)) {
1863 *reg_field =
1864 cdef->is_nullable
1865 ? make_field(*cdef, share, nullptr,
1866 pointer_cast<uchar *>(const_cast<char *>("")), 1)
1867 : make_field(*cdef, share);
1868 if (!*reg_field) goto error;
1869 (*reg_field)->init(table);
1870 record_length += (*reg_field)->pack_length();
1871 if (!(*reg_field)->is_flag_set(NOT_NULL_FLAG)) null_count++;
1872 (*reg_field)->set_field_index(idx++);
1873 if ((*reg_field)->type() == MYSQL_TYPE_BIT)
1874 total_uneven_bit_length += (*reg_field)->field_length & 7;
1875
1876 if ((*reg_field)->is_flag_set(BLOB_FLAG))
1877 share->blob_field[blob_count++] = (uint)(reg_field - table->field);
1878
1879 reg_field++;
1880 }
1881 *reg_field = nullptr; /* mark the end of the list */
1882 share->blob_field[blob_count] = 0; /* mark the end of the list */
1883 share->blob_fields = blob_count;
1884
1885 null_pack_length = (null_count + total_uneven_bit_length + 7) / 8;
1886 share->reclength = record_length + null_pack_length;
1887 share->null_bytes = null_pack_length;
1888 share->null_fields = null_count;
1889 share->fields = field_count;
1890
1891 if (is_virtual) {
1892 /*
1893 When the table is virtual, updates won't be done on the table and
1894 default values won't be stored. Thus no need to allocate buffers for
1895 that.
1896 */
1897 share->rec_buff_length = ALIGN_SIZE(share->reclength + 1);
1898 table->record[0] = (uchar *)thd->alloc(share->rec_buff_length);
1899 if (!table->record[0]) goto error;
1900 if (null_pack_length) {
1901 table->null_flags = table->record[0];
1902 memset(table->record[0], 255, null_pack_length); // Set null fields
1903 }
1904 } else if (alloc_record_buffers(table))
1905 goto error;
1906
1907 setup_tmp_table_column_bitmaps(table, bitmaps);
1908
1909 {
1910 /* Set up field pointers */
1911 uchar *null_flags = table->record[0];
1912 uchar *pos = null_flags + share->null_bytes;
1913 uint null_counter = 0;
1914
1915 for (reg_field = table->field; *reg_field; ++reg_field) {
1916 Field *field = *reg_field;
1917 relocate_field(field, pos, null_flags, &null_counter);
1918 pos += field->pack_length();
1919 }
1920 }
1921
1922 if (is_virtual) return table;
1923
1924 store_record(table, s->default_values); // Make empty default record
1925
1926 if (setup_tmp_table_handler(table, select_options, false, false)) goto error;
1927
1928 return table;
1929 error:
1930 for (reg_field = table->field; *reg_field; ++reg_field) destroy(*reg_field);
1931 return nullptr;
1932 }
1933
1934 /**
1935 Checks if disk storage engine should be used for temporary table.
1936
1937 @param table table to allocate SE for
1938 @param select_options current select's options
1939 @param force_disk_table true <=> Use InnoDB
1940 @param mem_engine Selected in-memory storage engine.
1941
1942 @return
1943 true if disk storage engine should be used
1944 false if disk storage engine is not required
1945 */
use_tmp_disk_storage_engine(TABLE * table,ulonglong select_options,bool force_disk_table,enum_internal_tmp_mem_storage_engine mem_engine)1946 static bool use_tmp_disk_storage_engine(
1947 TABLE *table, ulonglong select_options, bool force_disk_table,
1948 enum_internal_tmp_mem_storage_engine mem_engine) {
1949 THD *thd = table->in_use;
1950 TABLE_SHARE *share = table->s;
1951
1952 /* Caller needs SE to be disk-based (@see create_tmp_table()). */
1953 if (force_disk_table) {
1954 return true;
1955 }
1956
1957 /*
1958 During bootstrap, the heap engine is not available, so we force using
1959 disk storage engine. This is especially hit when creating a I_S system
1960 view definition with a UNION in it AND is also when upgrading from
1961 older DD tables which involves execution of UPDATE queries to adjust
1962 metadata of DD tables.
1963 */
1964 if (opt_initialize || thd->is_dd_system_thread()) {
1965 return true;
1966 }
1967
1968 if (mem_engine == TMP_TABLE_MEMORY) {
1969 /* MEMORY do not support BLOBs */
1970 if (share->blob_fields) {
1971 return true;
1972 }
1973 } else {
1974 DBUG_ASSERT(mem_engine == TMP_TABLE_TEMPTABLE);
1975 }
1976
1977 /* User said the result would be big, so may not fit in memory */
1978 if ((thd->variables.big_tables) && !(select_options & SELECT_SMALL_RESULT)) {
1979 return true;
1980 }
1981
1982 return false;
1983 }
1984
1985 /**
1986 Helper function to create_tmp_table_* family for setting up table's SE
1987
1988 @param table table to allocate SE for
1989 @param select_options current select's options
1990 @param force_disk_table true <=> Use InnoDB
1991 @param schema_table whether the table is a schema table
1992
1993 @returns
1994 false on success
1995 true otherwise
1996 */
setup_tmp_table_handler(TABLE * table,ulonglong select_options,bool force_disk_table,bool schema_table)1997 static bool setup_tmp_table_handler(TABLE *table, ulonglong select_options,
1998 bool force_disk_table, bool schema_table) {
1999 THD *thd = table->in_use;
2000
2001 TABLE_SHARE *share = table->s;
2002 enum_internal_tmp_mem_storage_engine mem_engine =
2003 static_cast<enum_internal_tmp_mem_storage_engine>(
2004 thd->variables.internal_tmp_mem_storage_engine);
2005
2006 /* Except for special conditions, tmp table engine will be chosen by user. */
2007
2008 /* For information_schema tables we use the Heap engine because we do
2009 not allow user-created TempTable tables and even though information_schema
2010 tables are not user-created, an ingenious user may execute:
2011 CREATE TABLE myowntemptabletable LIKE information_schema.some; */
2012 if (schema_table && (mem_engine == TMP_TABLE_TEMPTABLE)) {
2013 mem_engine = TMP_TABLE_MEMORY;
2014 }
2015
2016 if (use_tmp_disk_storage_engine(table, select_options, force_disk_table,
2017 mem_engine)) {
2018 share->db_plugin = ha_lock_engine(nullptr, innodb_hton);
2019 } else {
2020 share->db_plugin = nullptr;
2021 switch (mem_engine) {
2022 case TMP_TABLE_TEMPTABLE:
2023 share->db_plugin = ha_lock_engine(nullptr, temptable_hton);
2024 break;
2025 case TMP_TABLE_MEMORY:
2026 share->db_plugin = ha_lock_engine(nullptr, heap_hton);
2027 break;
2028 }
2029 DBUG_ASSERT(share->db_plugin != nullptr);
2030 }
2031
2032 if (!(table->file =
2033 get_new_handler(share, false, &share->mem_root, share->db_type())))
2034 return true;
2035
2036 // Update the handler with information about the table object
2037 table->file->change_table_ptr(table, share);
2038 if (table->file->set_ha_share_ref(&share->ha_share)) {
2039 destroy(table->file);
2040 return true;
2041 }
2042
2043 // Initialize cost model for this table
2044 table->init_cost_model(thd->cost_model());
2045
2046 return false;
2047 }
2048
2049 /**
2050 Helper function for create_tmp_table_* family for allocating record buffers
2051
2052 @note Caller must initialize TABLE_SHARE::reclength and
2053 TABLE_SHARE::null_bytes before calling this function.
2054
2055 @param table table to allocate record buffers for
2056
2057 @returns
2058 false on success
2059 true otherwise
2060 */
2061
alloc_record_buffers(TABLE * table)2062 static bool alloc_record_buffers(TABLE *table) {
2063 TABLE_SHARE *share = table->s;
2064 THD *thd = table->in_use;
2065 /*
2066 Same as MI_UNIQUE_HASH_LENGTH,
2067 allows to exclude "myisam.h" from include files.
2068 */
2069 const int TMP_TABLE_UNIQUE_HASH_LENGTH = 4;
2070 uint alloc_length =
2071 ALIGN_SIZE(share->reclength + TMP_TABLE_UNIQUE_HASH_LENGTH + 1);
2072 share->rec_buff_length = alloc_length;
2073 /*
2074 Note that code in open_table_from_share() relies on the fact that
2075 for optimizer-created temporary tables TABLE_SHARE::default_values
2076 is allocated in a single chuck with TABLE::record[0] for the first
2077 TABLE instance.
2078 */
2079 if (!(table->record[0] = (uchar *)share->mem_root.Alloc(
2080 (alloc_length * 3 + share->null_bytes))))
2081 return true;
2082 table->record[1] = table->record[0] + alloc_length;
2083 share->default_values = table->record[1] + alloc_length;
2084 table->null_flags_saved = share->default_values + alloc_length;
2085 if (share->null_bytes) {
2086 table->null_flags = table->record[0];
2087 memset(table->record[0], 255, share->null_bytes); // Set null fields
2088 }
2089
2090 if (thd->variables.tmp_table_size == ~(ulonglong)0) // No limit
2091 share->max_rows = ~(ha_rows)0;
2092 else
2093 share->max_rows = (ha_rows)(((share->db_type() == heap_hton)
2094 ? min(thd->variables.tmp_table_size,
2095 thd->variables.max_heap_table_size)
2096 : thd->variables.tmp_table_size) /
2097 share->reclength);
2098 share->max_rows =
2099 std::max(share->max_rows, ha_rows(1)); // For dummy start options
2100
2101 return false;
2102 }
2103
open_tmp_table(TABLE * table)2104 bool open_tmp_table(TABLE *table) {
2105 DBUG_ASSERT(table->s->ref_count() == 1 || // not shared, or:
2106 table->s->db_type() == heap_hton || // using right engines
2107 table->s->db_type() == temptable_hton ||
2108 table->s->db_type() == innodb_hton);
2109
2110 int error;
2111 if ((error = table->file->ha_open(table, table->s->table_name.str, O_RDWR,
2112 HA_OPEN_TMP_TABLE | HA_OPEN_INTERNAL_TABLE,
2113 nullptr))) {
2114 table->file->print_error(error, MYF(0)); /* purecov: inspected */
2115 table->db_stat = 0;
2116 return (true);
2117 }
2118 (void)table->file->ha_extra(HA_EXTRA_QUICK); /* Faster */
2119
2120 table->set_created();
2121 table->s->tmp_handler_count++;
2122 return false;
2123 }
2124
2125 /**
2126 Try to create an in-memory temporary table and if not enough space, then
2127 try to create an on-disk one.
2128
2129 Create a temporary table according to passed description.
2130
2131 The passed array or MI_COLUMNDEF structures must have this form:
2132
2133 1. 1-byte column (afaiu for 'deleted' flag) (note maybe not 1-byte
2134 when there are many nullable columns)
2135 2. Table columns
2136 3. One free MI_COLUMNDEF element (*recinfo points here)
2137
2138 This function may use the free element to create hash column for unique
2139 constraint.
2140
2141 @param[in,out] table Table object that describes the table to be created
2142
2143 @retval false OK
2144 @retval true Error
2145 */
create_tmp_table_with_fallback(TABLE * table)2146 static bool create_tmp_table_with_fallback(TABLE *table) {
2147 TABLE_SHARE *share = table->s;
2148
2149 DBUG_TRACE;
2150
2151 HA_CREATE_INFO create_info;
2152
2153 create_info.db_type = table->s->db_type();
2154 create_info.row_type = table->s->row_type;
2155 create_info.options |=
2156 HA_LEX_CREATE_TMP_TABLE | HA_LEX_CREATE_INTERNAL_TMP_TABLE;
2157
2158 /*
2159 INNODB's fixed length column size is restricted to 1024. Exceeding this can
2160 result in incorrect behavior.
2161 */
2162 if (table->s->db_type() == innodb_hton) {
2163 for (Field **field = table->field; *field; ++field) {
2164 if ((*field)->type() == MYSQL_TYPE_STRING &&
2165 (*field)->key_length() > 1024) {
2166 my_error(ER_TOO_LONG_KEY, MYF(0), 1024);
2167 return true;
2168 }
2169 }
2170 }
2171
2172 int error =
2173 table->file->create(share->table_name.str, table, &create_info, nullptr);
2174 if (error == HA_ERR_RECORD_FILE_FULL &&
2175 table->s->db_type() == temptable_hton) {
2176 table->file =
2177 get_new_handler(table->s, false, &table->s->mem_root, innodb_hton);
2178 error = table->file->create(share->table_name.str, table, &create_info,
2179 nullptr);
2180 }
2181
2182 if (error) {
2183 table->file->print_error(error, MYF(0)); /* purecov: inspected */
2184 table->db_stat = 0;
2185 return true;
2186 } else {
2187 if (table->s->db_type() != temptable_hton) {
2188 table->in_use->inc_status_created_tmp_disk_tables();
2189 }
2190 return false;
2191 }
2192 }
2193
trace_tmp_table(Opt_trace_context * trace,const TABLE * table)2194 static void trace_tmp_table(Opt_trace_context *trace, const TABLE *table) {
2195 TABLE_SHARE *s = table->s;
2196 Opt_trace_object trace_tmp(trace, "tmp_table_info");
2197 if (strlen(table->alias) != 0)
2198 trace_tmp.add_utf8_table(table->pos_in_table_list);
2199 else
2200 trace_tmp.add_alnum("table", "intermediate_tmp_table");
2201 QEP_TAB *tab = table->reginfo.qep_tab;
2202 if (tab != nullptr && tab->join() != nullptr)
2203 trace_tmp.add("in_plan_at_position", tab->idx());
2204 trace_tmp.add("columns", s->fields)
2205 .add("row_length", s->reclength)
2206 .add("key_length", table->key_info ? table->key_info->key_length : 0)
2207 .add("unique_constraint", table->hash_field ? true : false)
2208 .add("makes_grouped_rows", table->group != nullptr)
2209 .add("cannot_insert_duplicates", table->is_distinct);
2210
2211 if (s->db_type() == innodb_hton) {
2212 trace_tmp.add_alnum("location", "disk (InnoDB)");
2213 if (s->db_create_options & HA_OPTION_PACK_RECORD)
2214 trace_tmp.add_alnum("record_format", "packed");
2215 else
2216 trace_tmp.add_alnum("record_format", "fixed");
2217 } else if (table->s->db_type() == temptable_hton) {
2218 trace_tmp.add_alnum("location", "TempTable");
2219 } else {
2220 DBUG_ASSERT(s->db_type() == heap_hton);
2221 trace_tmp.add_alnum("location", "memory (heap)")
2222 .add("row_limit_estimate", s->max_rows);
2223 }
2224 }
2225
2226 /**
2227 @brief
2228 Instantiates temporary table
2229
2230 @param thd Thread handler
2231 @param table Table object that describes the table to be
2232 instantiated
2233
2234 @details
2235 Creates tmp table and opens it.
2236
2237 @return
2238 false - OK
2239 true - Error
2240 */
2241
instantiate_tmp_table(THD * thd,TABLE * table)2242 bool instantiate_tmp_table(THD *thd, TABLE *table) {
2243 TABLE_SHARE *const share = table->s;
2244 #ifndef DBUG_OFF
2245 for (uint i = 0; i < share->fields; i++)
2246 DBUG_ASSERT(table->field[i]->gcol_info == nullptr &&
2247 table->field[i]->stored_in_db);
2248 #endif
2249 thd->inc_status_created_tmp_tables();
2250
2251 if (share->db_type() == temptable_hton) {
2252 if (create_tmp_table_with_fallback(table)) return true;
2253 } else if (share->db_type() == innodb_hton) {
2254 if (create_tmp_table_with_fallback(table)) return true;
2255 // Make empty record so random data is not written to disk
2256 empty_record(table);
2257 }
2258
2259 // If a heap table, it's created by open_tmp_table().
2260 if (open_tmp_table(table)) {
2261 /*
2262 Delete table immediately if we fail to open it, so
2263 TABLE::is_created() also implies that table is open.
2264 */
2265 table->file->ha_delete_table(share->table_name.str,
2266 nullptr); /* purecov: inspected */
2267 return true;
2268 }
2269
2270 if (share->first_unused_tmp_key < share->keys) {
2271 /*
2272 Some other clone of this materialized temporary table has defined
2273 "possible" keys; as we are here creating the table in the engine, we must
2274 decide here what to do with them: drop them now, or make them "existing"
2275 now. As the other clone assumes they will be available if the Optimizer
2276 chooses them, we make them existing.
2277 */
2278 share->find_first_unused_tmp_key(Key_map(share->keys));
2279 }
2280
2281 Opt_trace_context *const trace = &thd->opt_trace;
2282 if (unlikely(trace->is_started())) {
2283 Opt_trace_object wrapper(trace);
2284 Opt_trace_object convert(trace, "creating_tmp_table");
2285 trace_tmp_table(trace, table);
2286 }
2287 return false;
2288 }
2289
2290 /**
2291 Free TABLE object and release associated resources for
2292 internal temporary table.
2293 */
free_tmp_table(THD * thd,TABLE * entry)2294 void free_tmp_table(THD *thd, TABLE *entry) {
2295 const char *save_proc_info;
2296 DBUG_TRACE;
2297 DBUG_PRINT("enter", ("table: %s", entry->alias));
2298
2299 save_proc_info = thd->proc_info;
2300 THD_STAGE_INFO(thd, stage_removing_tmp_table);
2301
2302 filesort_free_buffers(entry, true);
2303
2304 DBUG_ASSERT(entry->s->tmp_handler_count <= entry->s->ref_count());
2305
2306 if (entry->is_created()) {
2307 DBUG_ASSERT(entry->s->tmp_handler_count >= 1);
2308 // Table is marked as created only if was successfully opened.
2309 if (--entry->s->tmp_handler_count)
2310 entry->file->ha_close();
2311 else // no more open 'handler' objects
2312 entry->file->ha_drop_table(entry->s->table_name.str);
2313 entry->set_deleted();
2314 }
2315
2316 destroy(entry->file);
2317 entry->file = nullptr;
2318
2319 /* free blobs */
2320 for (Field **ptr = entry->field; *ptr; ptr++) (*ptr)->mem_free();
2321 free_io_cache(entry);
2322
2323 DBUG_ASSERT(entry->mem_root.allocated_size() == 0);
2324
2325 DBUG_ASSERT(entry->s->ref_count() >= 1);
2326 if (entry->s->decrement_ref_count() == 0) // no more TABLE objects
2327 {
2328 plugin_unlock(nullptr, entry->s->db_plugin);
2329 /*
2330 In create_tmp_table(), the share's memroot is allocated inside own_root
2331 and is then made a copy of own_root, so it is inside its memory blocks,
2332 so as soon as we free a memory block the memroot becomes unreadable.
2333 So we need a copy to free it.
2334 */
2335 MEM_ROOT own_root = std::move(entry->s->mem_root);
2336 destroy(entry);
2337 free_root(&own_root, MYF(0));
2338 }
2339
2340 thd_proc_info(thd, save_proc_info);
2341 }
2342
2343 /**
2344 If a MEMORY table gets full, create a disk-based table and copy all rows
2345 to this.
2346
2347 @param thd THD reference
2348 @param wtable Table reference being written to
2349 @param error Reason why inserting into MEMORY table failed.
2350 @param ignore_last_dup If true, ignore duplicate key error for last
2351 inserted key (see detailed description below).
2352 @param [out] is_duplicate if non-NULL and ignore_last_dup is true,
2353 return true if last key was a duplicate,
2354 and false otherwise.
2355
2356 @details
2357 Function can be called with any error code, but only HA_ERR_RECORD_FILE_FULL
2358 will be handled, all other errors cause a fatal error to be thrown.
2359 The function creates a disk-based temporary table, copies all records
2360 from the MEMORY table into this new table, deletes the old table and
2361 switches to use the new table within the table handle.
2362 The function uses table->record[1] as a temporary buffer while copying.
2363
2364 The function assumes that table->record[0] contains the row that caused
2365 the error when inserting into the MEMORY table (the "last row").
2366 After all existing rows have been copied to the new table, the last row
2367 is attempted to be inserted as well. If ignore_last_dup is true,
2368 this row can be a duplicate of an existing row without throwing an error.
2369 If is_duplicate is non-NULL, an indication of whether the last row was
2370 a duplicate is returned.
2371
2372 @note that any index/scan access initialized on the MEMORY 'wtable' is not
2373 replicated to the on-disk table - it's the caller's responsibility.
2374 However, access initialized on other TABLEs, is replicated.
2375
2376 If 'wtable' has other TABLE clones (example: a multi-referenced or a
2377 recursive CTE), we convert all clones; if an error happens during conversion
2378 of clone B after successfully converting clone A, clone A and B will exit
2379 from the function with a TABLE_SHARE corresponding to the pre-conversion
2380 table ("old" TABLE_SHARE). So A will be inconsistent (for example
2381 s->db_type() will say "MEMORY" while A->file will be a disk-based engine).
2382 However, as all callers bail out, it is reasonable to think that they won't
2383 be using the TABLE_SHARE except in free_tmp_table(); and free_tmp_table()
2384 only uses properties of TABLE_SHARE which are common to the old and new
2385 object (reference counts, MEM_ROOT), so that should work.
2386 Solutions to fix this cleanly:
2387 - allocate new TABLE_SHARE on heap instead of on stack, to be able to
2388 exit with two TABLE_SHAREs (drawback: more heap memory consumption, and need
2389 to verify all exit paths are safe),
2390 - close all TABLEs if error (but then callers and cleanup code may be
2391 surprised to find already-closed tables so they would need fixing).
2392 To lower the risk of error between A and B: we expect most errors will
2393 happen when copying rows (e.g. read or write errors); so we convert 'wtable'
2394 (which does the row copying) first; if it fails, the A-B situation is
2395 avoided and we can properly exit with the old TABLE_SHARE.
2396
2397 @returns true if error.
2398 */
2399
create_ondisk_from_heap(THD * thd,TABLE * wtable,int error,bool ignore_last_dup,bool * is_duplicate)2400 bool create_ondisk_from_heap(THD *thd, TABLE *wtable, int error,
2401 bool ignore_last_dup, bool *is_duplicate) {
2402 int write_err = 0;
2403 #ifndef DBUG_OFF
2404 const uint initial_handler_count = wtable->s->tmp_handler_count;
2405 bool rows_on_disk = false;
2406 #endif
2407 bool table_on_disk = false;
2408 DBUG_TRACE;
2409
2410 if (error != HA_ERR_RECORD_FILE_FULL) {
2411 /*
2412 We don't want this error to be converted to a warning, e.g. in case of
2413 INSERT IGNORE ... SELECT.
2414 */
2415 wtable->file->print_error(error, MYF(ME_FATALERROR));
2416 return true;
2417 }
2418
2419 if (wtable->s->db_type() != heap_hton) {
2420 if (wtable->s->db_type() != temptable_hton || temptable_use_mmap) {
2421 /* Do not convert in-memory temporary tables to on-disk
2422 temporary tables if the storage engine is anything other
2423 than the temptable engine or if the user has set the variable
2424 temptable_use_mmap to true to use mmap'ed files for temporary
2425 tables. */
2426 wtable->file->print_error(error, MYF(ME_FATALERROR));
2427 return true;
2428 }
2429
2430 /* If we are here, then the in-memory temporary tables need
2431 to be converted into on-disk temporary tables */
2432 }
2433
2434 const char *save_proc_info = thd->proc_info;
2435 THD_STAGE_INFO(thd, stage_converting_heap_to_ondisk);
2436
2437 TABLE_SHARE *const old_share = wtable->s;
2438 const plugin_ref old_plugin = old_share->db_plugin;
2439 TABLE_SHARE share = std::move(*old_share);
2440 DBUG_ASSERT(share.ha_share == nullptr);
2441
2442 share.db_plugin = ha_lock_engine(thd, innodb_hton);
2443
2444 TABLE_LIST *const wtable_list = wtable->pos_in_table_list;
2445 Derived_refs_iterator ref_it(wtable_list);
2446
2447 if (wtable_list) {
2448 Common_table_expr *cte = wtable_list->common_table_expr();
2449 if (cte) {
2450 int i = 0, found = -1;
2451 TABLE *t;
2452 while ((t = ref_it.get_next())) {
2453 if (t == wtable) {
2454 found = i;
2455 break;
2456 }
2457 ++i;
2458 }
2459 DBUG_ASSERT(found >= 0);
2460 if (found > 0)
2461 // 'wtable' is at position 'found', move it to 0 to convert it first
2462 std::swap(cte->tmp_tables[0], cte->tmp_tables[found]);
2463 ref_it.rewind();
2464 }
2465 }
2466
2467 TABLE new_table, *table = nullptr;
2468
2469 while (true) {
2470 if (wtable_list) // Possibly there are clones
2471 {
2472 table = ref_it.get_next();
2473 if (table == nullptr) break;
2474 } else // No clones
2475 {
2476 if (table == wtable) // Already processed
2477 break;
2478 table = wtable;
2479 }
2480
2481 table->mem_root.Clear();
2482
2483 // Set up a partial copy of the table.
2484 new_table.record[0] = table->record[0];
2485 new_table.record[1] = table->record[1];
2486 new_table.field = table->field;
2487 new_table.key_info = table->key_info;
2488 new_table.in_use = table->in_use;
2489 new_table.db_stat = table->db_stat;
2490 new_table.key_info = table->key_info;
2491 new_table.hash_field = table->hash_field;
2492 new_table.group = table->group;
2493 new_table.is_distinct = table->is_distinct;
2494 new_table.alias = table->alias;
2495 new_table.pos_in_table_list = table->pos_in_table_list;
2496 new_table.reginfo = table->reginfo;
2497 new_table.read_set = table->read_set;
2498 new_table.write_set = table->write_set;
2499
2500 new_table.s = &share; // New table points to new share
2501
2502 if (!(new_table.file = get_new_handler(
2503 &share, false, &new_table.s->mem_root, new_table.s->db_type())))
2504 goto err_after_proc_info; /* purecov: inspected */
2505 if (new_table.file->set_ha_share_ref(&share.ha_share))
2506 goto err_after_alloc; /* purecov: inspected */
2507
2508 /* Fix row type which might have changed with SE change. */
2509 set_real_row_type(&new_table);
2510
2511 if (!table_on_disk) {
2512 if (create_tmp_table_with_fallback(&new_table))
2513 goto err_after_alloc; /* purecov: inspected */
2514
2515 table_on_disk = true;
2516 }
2517
2518 bool rec_ref_w_open_cursor = false, psi_batch_started = false;
2519
2520 if (table->is_created()) {
2521 // Close it, drop it, and open a new one in the disk-based engine.
2522
2523 if (open_tmp_table(&new_table))
2524 goto err_after_create; /* purecov: inspected */
2525
2526 if (table->file->indexes_are_disabled())
2527 new_table.file->ha_disable_indexes(HA_KEY_SWITCH_ALL);
2528
2529 if (table == wtable) {
2530 // The table receiving writes; migrate rows before closing/dropping.
2531
2532 if (unlikely(thd->opt_trace.is_started())) {
2533 Opt_trace_context *trace = &thd->opt_trace;
2534 Opt_trace_object wrapper(trace);
2535 Opt_trace_object convert(trace, "converting_tmp_table_to_ondisk");
2536 DBUG_ASSERT(error == HA_ERR_RECORD_FILE_FULL);
2537 convert.add_alnum("cause", "memory_table_size_exceeded");
2538 trace_tmp_table(trace, &new_table);
2539 }
2540
2541 table->file->ha_index_or_rnd_end();
2542
2543 if ((write_err = table->file->ha_rnd_init(true))) {
2544 /* purecov: begin inspected */
2545 table->file->print_error(write_err, MYF(ME_FATALERROR));
2546 write_err = 0;
2547 goto err_after_open;
2548 /* purecov: end */
2549 }
2550
2551 if (table->no_rows) {
2552 new_table.file->ha_extra(HA_EXTRA_NO_ROWS);
2553 new_table.no_rows = true;
2554 }
2555
2556 /*
2557 copy all old rows from heap table to on-disk table
2558 This is the only code that uses record[1] to read/write but this
2559 is safe as this is a temporary on-disk table without timestamp/
2560 autoincrement or partitioning.
2561 */
2562 while (!table->file->ha_rnd_next(new_table.record[1])) {
2563 write_err = new_table.file->ha_write_row(new_table.record[1]);
2564 DBUG_EXECUTE_IF("raise_error", write_err = HA_ERR_FOUND_DUPP_KEY;);
2565 if (write_err) goto err_after_open;
2566 }
2567 /* copy row that filled HEAP table */
2568 if ((write_err = new_table.file->ha_write_row(table->record[0]))) {
2569 if (!new_table.file->is_ignorable_error(write_err) ||
2570 !ignore_last_dup)
2571 goto err_after_open;
2572 if (is_duplicate) *is_duplicate = true;
2573 } else {
2574 if (is_duplicate) *is_duplicate = false;
2575 }
2576
2577 (void)table->file->ha_rnd_end();
2578 #ifndef DBUG_OFF
2579 rows_on_disk = true;
2580 #endif
2581 }
2582
2583 /* remove heap table and change to use on-disk table */
2584
2585 // TODO(sgunders): Move this into MaterializeIterator when we remove the
2586 // pre-iterator executor.
2587 if (table->pos_in_table_list &&
2588 table->pos_in_table_list->is_recursive_reference() &&
2589 table->file->inited) {
2590 /*
2591 Due to the last condition, this is guaranteed to be a recursive
2592 reference belonging to the unit which 'wtable' materializes, and not
2593 to the unit of another non-recursive reference (indeed, this other
2594 reference will re-use the rows of 'wtable', i.e. not execute its
2595 unit).
2596 This reference has opened a cursor.
2597 In the 'tmp_tables' list, 'wtable' is always before such recursive
2598 reference, as setup_materialized_derived_tmp_table() runs before
2599 substitute_recursive_reference(). So, we know the disk-based rows
2600 already exist at this point.
2601 */
2602 DBUG_ASSERT(rows_on_disk);
2603 (void)table->file->ha_rnd_end();
2604 rec_ref_w_open_cursor = true;
2605 }
2606
2607 psi_batch_started = table->file->end_psi_batch_mode_if_started();
2608
2609 // Close the in-memory table
2610 if (table->s->db_type() == temptable_hton) {
2611 /* Drop the in-memory temptable.
2612 This code can execute only if mmap'ed temporary
2613 files were disabled using temptable_use_mmap variable */
2614 DBUG_ASSERT(temptable_use_mmap == false);
2615 table->file->ha_drop_table(table->s->table_name.str);
2616 } else {
2617 // Closing the MEMORY table drops it if its ref count is down to zero
2618 (void)table->file->ha_close();
2619 }
2620 share.tmp_handler_count--;
2621 }
2622
2623 /*
2624 Replace the guts of the old table with the new one, although keeping
2625 most members.
2626 */
2627 destroy(table->file);
2628 table->s = new_table.s;
2629 table->file = new_table.file;
2630 table->db_stat = new_table.db_stat;
2631 table->in_use = new_table.in_use;
2632 table->no_rows = new_table.no_rows;
2633 table->record[0] = new_table.record[0];
2634 table->record[1] = new_table.record[1];
2635 table->mem_root = std::move(new_table.mem_root);
2636
2637 /*
2638 Depending on if this TABLE clone is early/late in optimization, or in
2639 execution, it has a JOIN_TAB or a QEP_TAB or none.
2640 */
2641 QEP_TAB *qep_tab = table->reginfo.qep_tab;
2642 QEP_shared_owner *tab;
2643 if (qep_tab)
2644 tab = qep_tab;
2645 else
2646 tab = table->reginfo.join_tab;
2647
2648 /* Update quick select, if any. */
2649 if (tab && tab->quick()) {
2650 DBUG_ASSERT(table->pos_in_table_list->uses_materialization());
2651 tab->quick()->set_handler(table->file);
2652 }
2653
2654 // TODO(sgunders): Move this into MaterializeIterator when we remove the
2655 // pre-iterator executor.
2656 if (rec_ref_w_open_cursor) {
2657 /*
2658 The table just changed from MEMORY to INNODB. 'table' is a reader and
2659 had an open cursor to the MEMORY table. We closed the cursor, now need
2660 to open it to InnoDB and re-position it at the same row as before.
2661 Row positions (returned by handler::position()) are different in
2662 MEMORY and InnoDB - so the MEMORY row and InnoDB row have differing
2663 positions.
2664 We had read N rows of the MEMORY table, need to re-position our
2665 cursor after the same N rows in the InnoDB table.
2666 */
2667 if (psi_batch_started) table->file->start_psi_batch_mode();
2668 }
2669
2670 // Point 'table' back to old_share; *old_share will be updated after loop.
2671 table->s = old_share;
2672 /*
2673 Update share-dependent pointers cached in 'table->file' and in
2674 read_set/write_set.
2675 */
2676 table->file->change_table_ptr(table, table->s);
2677 table->file->set_ha_share_ref(&table->s->ha_share);
2678 table->use_all_columns();
2679
2680 } // End of tables-processing loop
2681
2682 plugin_unlock(nullptr, old_plugin);
2683 share.db_plugin = my_plugin_lock(nullptr, &share.db_plugin);
2684 *old_share = std::move(share);
2685
2686 /*
2687 Now old_share is new, and all TABLEs in Derived_refs_iterator point to
2688 it, and so do their table->file: everything is consistent.
2689 */
2690
2691 DBUG_ASSERT(initial_handler_count == wtable->s->tmp_handler_count);
2692
2693 if (save_proc_info)
2694 thd_proc_info(thd, (!strcmp(save_proc_info, "Copying to tmp table")
2695 ? "Copying to tmp table on disk"
2696 : save_proc_info));
2697 return false;
2698
2699 err_after_open:
2700 if (write_err) {
2701 DBUG_PRINT("error", ("Got error: %d", write_err));
2702 new_table.file->print_error(write_err, MYF(0));
2703 }
2704 if (table->file->inited) (void)table->file->ha_rnd_end();
2705 (void)new_table.file->ha_close();
2706 err_after_create:
2707 new_table.file->ha_delete_table(new_table.s->table_name.str, nullptr);
2708 err_after_alloc:
2709 destroy(new_table.file);
2710 err_after_proc_info:
2711 thd_proc_info(thd, save_proc_info);
2712 // New share took control of old share mem_root; regain control:
2713 old_share->mem_root = std::move(share.mem_root);
2714 return true;
2715 }
2716
2717 /**
2718 Encode an InnoDB PK in 6 bytes, high-byte first; like
2719 InnoDB's dict_sys_write_row_id() does.
2720 @param rowid_bytes where to store the result
2721 @param length how many available bytes in rowid_bytes
2722 @param row_num PK to encode
2723 */
encode_innodb_position(uchar * rowid_bytes,uint length MY_ATTRIBUTE ((unused)),ha_rows row_num)2724 void encode_innodb_position(uchar *rowid_bytes,
2725 uint length MY_ATTRIBUTE((unused)),
2726 ha_rows row_num) {
2727 DBUG_ASSERT(length == 6);
2728 for (int i = 0; i < 6; i++)
2729 rowid_bytes[i] = (uchar)(row_num >> ((5 - i) * 8));
2730 }
2731
2732 /**
2733 Helper function for create_ondisk_from_heap().
2734
2735 Our InnoDB on-disk intrinsic table uses an autogenerated
2736 auto-incrementing primary key:
2737 - first inserted row has pk=1 (see
2738 dict_table_get_next_table_sess_row_id()), second has pk=2, etc
2739 - ha_rnd_next uses a PK index scan so returns rows in PK order
2740 - position() returns the PK
2741 - ha_rnd_pos() takes the PK in input.
2742
2743 @param table table read by cursor
2744 @param row_num function should position on the row_num'th row in insertion
2745 order.
2746 */
reposition_innodb_cursor(TABLE * table,ha_rows row_num)2747 bool reposition_innodb_cursor(TABLE *table, ha_rows row_num) {
2748 DBUG_ASSERT(table->s->db_type() == innodb_hton);
2749 if (table->file->ha_rnd_init(false)) return true; /* purecov: inspected */
2750 // Per the explanation above, the wanted InnoDB row has PK=row_num.
2751 uchar rowid_bytes[6];
2752 encode_innodb_position(rowid_bytes, sizeof(rowid_bytes), row_num);
2753 /*
2754 Go to the row, and discard the row. That places the cursor at
2755 the same row as before the engine conversion, so that rnd_next() will
2756 read the (row_num+1)th row.
2757 */
2758 return table->file->ha_rnd_pos(table->record[0], rowid_bytes);
2759 }
2760