1 /* Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
22 
23 /**
24   @file sql/sql_tmp_table.cc
25   Temporary tables implementation.
26 */
27 
28 #include "sql/sql_tmp_table.h"
29 
30 #include <fcntl.h>
31 #include <stddef.h>
32 #include <stdio.h>
33 #include <algorithm>
34 #include <cstring>
35 #include <new>
36 #include <utility>
37 #include <vector>
38 
39 #include "field_types.h"
40 #include "lex_string.h"
41 #include "m_ctype.h"
42 #include "m_string.h"
43 #include "my_alloc.h"
44 #include "my_bitmap.h"
45 #include "my_compiler.h"
46 #include "my_dbug.h"
47 #include "my_pointer_arithmetic.h"
48 #include "my_sys.h"
49 #include "mysql/plugin.h"
50 #include "mysql/udf_registration_types.h"
51 #include "mysql_com.h"
52 #include "mysqld_error.h"
53 #include "scope_guard.h"
54 #include "sql/create_field.h"
55 #include "sql/current_thd.h"
56 #include "sql/dd/types/column.h"
57 #include "sql/debug_sync.h"  // DEBUG_SYNC
58 #include "sql/field.h"
59 #include "sql/filesort.h"  // filesort_free_buffers
60 #include "sql/handler.h"
61 #include "sql/item_func.h"  // Item_func
62 #include "sql/item_sum.h"   // Item_sum
63 #include "sql/key.h"
64 #include "sql/mem_root_allocator.h"
65 #include "sql/mem_root_array.h"     // Mem_root_array
66 #include "sql/mysqld.h"             // heap_hton
67 #include "sql/opt_range.h"          // QUICK_SELECT_I
68 #include "sql/opt_trace.h"          // Opt_trace_object
69 #include "sql/opt_trace_context.h"  // Opt_trace_context
70 #include "sql/psi_memory_key.h"
71 #include "sql/query_options.h"
72 #include "sql/sql_base.h"   // free_io_cache
73 #include "sql/sql_class.h"  // THD
74 #include "sql/sql_const.h"
75 #include "sql/sql_executor.h"  // SJ_TMP_TABLE
76 #include "sql/sql_lex.h"
77 #include "sql/sql_list.h"
78 #include "sql/sql_opt_exec_shared.h"
79 #include "sql/sql_plugin.h"  // plugin_unlock
80 #include "sql/sql_plugin_ref.h"
81 #include "sql/sql_select.h"
82 #include "sql/system_variables.h"
83 #include "sql/table.h"
84 #include "sql/temp_table_param.h"
85 #include "sql/thd_raii.h"
86 #include "sql/thr_malloc.h"
87 #include "sql/window.h"
88 #include "template_utils.h"
89 
90 using std::max;
91 using std::min;
92 static bool setup_tmp_table_handler(TABLE *table, ulonglong select_options,
93                                     bool force_disk_table, bool schema_table);
94 static bool alloc_record_buffers(TABLE *table);
95 
96 /****************************************************************************
97   Create internal temporary table
98 ****************************************************************************/
99 
100 /**
101   Create field for temporary table from given field.
102 
103   @param thd	      Thread handler
104   @param org_field    Field from which new field will be created
105   @param name         New field name
106   @param table	      Temporary table
107   @param item	      If item != NULL then fill_record() will update
108                       the record in the original table.
109                       If item == NULL then fill_record() will update
110                       the temporary table
111 
112   @retval
113     NULL		on error
114   @retval
115     new_created field
116 */
117 
create_tmp_field_from_field(THD * thd,const Field * org_field,const char * name,TABLE * table,Item_field * item)118 Field *create_tmp_field_from_field(THD *thd, const Field *org_field,
119                                    const char *name, TABLE *table,
120                                    Item_field *item) {
121   Field *new_field = org_field->new_field(thd->mem_root, table);
122   if (new_field == nullptr) return nullptr;
123 
124   new_field->init(table);
125   new_field->orig_table = org_field->table;
126   new_field->field_name = name;
127   if (org_field->is_flag_set(NO_DEFAULT_VALUE_FLAG))
128     new_field->set_flag(NO_DEFAULT_VALUE_FLAG);
129   if (org_field->is_nullable() || org_field->table->is_nullable() ||
130       (item && item->maybe_null))
131     new_field->clear_flag(NOT_NULL_FLAG);  // Because of outer join
132   if (org_field->type() == FIELD_TYPE_DOUBLE)
133     down_cast<Field_double *>(new_field)->not_fixed = true;
134   /*
135     This field will belong to an internal temporary table, it cannot be
136     generated.
137   */
138   new_field->gcol_info = nullptr;
139   new_field->stored_in_db = true;
140   return new_field;
141 }
142 
143 /**
144   Create field for temporary table using type of given item.
145 
146   @param item                  Item to create a field for
147   @param table                 Temporary table
148 
149   @retval
150     0  on error
151   @retval
152     new_created field
153 */
154 
create_tmp_field_from_item(Item * item,TABLE * table)155 static Field *create_tmp_field_from_item(Item *item, TABLE *table) {
156   bool maybe_null = item->maybe_null;
157   Field *new_field = nullptr;
158 
159   switch (item->result_type()) {
160     case REAL_RESULT:
161       new_field = new (*THR_MALLOC)
162           Field_double(item->max_length, maybe_null, item->item_name.ptr(),
163                        item->decimals, false, true);
164       break;
165     case INT_RESULT:
166       /*
167         Select an integer type with the minimal fit precision.
168         MY_INT32_NUM_DECIMAL_DIGITS is sign inclusive, don't consider the sign.
169         Values with MY_INT32_NUM_DECIMAL_DIGITS digits may or may not fit into
170         Field_long : make them Field_longlong.
171       */
172       if (item->max_length >= (MY_INT32_NUM_DECIMAL_DIGITS - 1))
173         new_field = new (*THR_MALLOC)
174             Field_longlong(item->max_length, maybe_null, item->item_name.ptr(),
175                            item->unsigned_flag);
176       else
177         new_field = new (*THR_MALLOC)
178             Field_long(item->max_length, maybe_null, item->item_name.ptr(),
179                        item->unsigned_flag);
180       break;
181     case STRING_RESULT:
182       DBUG_ASSERT(item->collation.collation);
183 
184       /*
185         DATE/TIME, GEOMETRY and JSON fields have STRING_RESULT result type.
186         To preserve type they needed to be handled separately.
187       */
188       if (item->is_temporal() || item->data_type() == MYSQL_TYPE_GEOMETRY ||
189           item->data_type() == MYSQL_TYPE_JSON) {
190         new_field = item->tmp_table_field_from_field_type(table, true);
191       } else {
192         new_field = item->make_string_field(table);
193       }
194       new_field->set_derivation(item->collation.derivation);
195       break;
196     case DECIMAL_RESULT:
197       new_field = Field_new_decimal::create_from_item(item);
198       break;
199     case ROW_RESULT:
200     default:
201       // This case should never be choosen
202       DBUG_ASSERT(0);
203       new_field = nullptr;
204       break;
205   }
206   if (new_field == nullptr) return nullptr;
207 
208   new_field->init(table);
209 
210   if (item->type() == Item::NULL_ITEM)
211     new_field->is_created_from_null_item = true;
212   return new_field;
213 }
214 
215 /**
216   Create field for information schema table.
217 
218   @param table		Temporary table
219   @param item		Item to create a field for
220 
221   @retval
222     0			on error
223   @retval
224     new_created field
225 */
226 
create_tmp_field_for_schema(const Item * item,TABLE * table)227 static Field *create_tmp_field_for_schema(const Item *item, TABLE *table) {
228   if (item->data_type() == MYSQL_TYPE_VARCHAR) {
229     Field *field;
230     if (item->max_length > MAX_FIELD_VARCHARLENGTH)
231       field = new (*THR_MALLOC)
232           Field_blob(item->max_length, item->maybe_null, item->item_name.ptr(),
233                      item->collation.collation, false);
234     else {
235       field = new (*THR_MALLOC) Field_varstring(
236           item->max_length, item->maybe_null, item->item_name.ptr(), table->s,
237           item->collation.collation);
238       table->s->db_create_options |= HA_OPTION_PACK_RECORD;
239     }
240     if (field) field->init(table);
241     return field;
242   }
243   return item->tmp_table_field_from_field_type(table, false);
244 }
245 
246 /**
247   Create field for temporary table.
248 
249   @param thd		Thread handler
250   @param table		Temporary table
251   @param item		Item to create a field for
252   @param type		Type of item (normally item->type)
253   @param copy_func	If set and item is a function, store copy of item
254                        in this array
255   @param from_field    if field will be created using other field as example,
256                        pointer example field will be written here
257   @param default_field	If field has a default value field, store it here
258   @param group		1 if we are going to do a relative group by on result
259   @param modify_item	1 if item->result_field should point to new item.
260                        This is relevent for how fill_record() is going to
261                        work:
262                        If modify_item is 1 then fill_record() will update
263                        the record in the original table.
264                        If modify_item is 0 then fill_record() will update
265                        the temporary table
266   @param table_cant_handle_bit_fields if table can't handle bit-fields and
267   bit-fields shall be converted to long @see
268   Temp_table_param::bit_fields_as_long
269   @param make_copy_field if true, a pointer of the result field should be stored
270   in from_field,  otherwise the item should be wrapped in Func_ptr and stored in
271   copy_func
272   @param copy_result_field true <=> save item's result_field in the from_field
273                        arg, before changing it. This is used for a window's
274                        OUT table when window uses frame buffer to copy a
275                        function's result field from OUT table to frame buffer
276                        (and back). @note that the goals of 'from_field' when
277                        this argument is true and when it is false, are
278                        different.
279 
280   @retval NULL On error.
281 
282   @retval new_created field
283 */
284 
create_tmp_field(THD * thd,TABLE * table,Item * item,Item::Type type,Func_ptr_array * copy_func,Field ** from_field,Field ** default_field,bool group,bool modify_item,bool table_cant_handle_bit_fields,bool make_copy_field,bool copy_result_field)285 Field *create_tmp_field(THD *thd, TABLE *table, Item *item, Item::Type type,
286                         Func_ptr_array *copy_func, Field **from_field,
287                         Field **default_field, bool group, bool modify_item,
288                         bool table_cant_handle_bit_fields, bool make_copy_field,
289                         bool copy_result_field) {
290   DBUG_TRACE;
291   Field *result = nullptr;
292   Item::Type orig_type = type;
293   Item *orig_item = nullptr;
294 
295   if (type != Item::FIELD_ITEM &&
296       item->real_item()->type() == Item::FIELD_ITEM) {
297     orig_item = item;
298     item = item->real_item();
299     type = Item::FIELD_ITEM;
300   }
301 
302   bool is_wf =
303       type == Item::SUM_FUNC_ITEM && item->real_item()->m_is_window_function;
304 
305   switch (type) {
306     case Item::FIELD_ITEM:
307     case Item::DEFAULT_VALUE_ITEM:
308     case Item::TRIGGER_FIELD_ITEM: {
309       Item_field *item_field = down_cast<Item_field *>(item);
310       /*
311         If item have to be able to store NULLs but underlaid field can't do it,
312         create_tmp_field_from_field() can't be used for tmp field creation.
313       */
314       if (item_field->maybe_null &&
315           !(item_field->field->is_nullable() ||
316             item_field->field->table->is_nullable())) {
317         result = create_tmp_field_from_item(item_field, table);
318       } else if (table_cant_handle_bit_fields &&
319                  item_field->field->type() == MYSQL_TYPE_BIT) {
320         result = create_tmp_field_from_item(item_field, table);
321         /*
322           If the item is a function, a pointer to the item is stored in
323           copy_func. We separate fields from functions by checking if the
324           item is a result field item. The real_item() must be checked to
325           avoid falsely identifying Item_ref and its subclasses as functions
326           when they refer to field-like items, such as Item_copy and
327           subclasses. References to true fields have already been untangled
328           in the beginning of create_tmp_field().
329          */
330         if (item->real_item()->is_result_field())
331           copy_func->push_back(Func_ptr(item));
332       } else {
333         result = create_tmp_field_from_field(
334             thd, item_field->field,
335             orig_item ? orig_item->item_name.ptr()
336                       : item_field->item_name.ptr(),
337             table,
338             (modify_item && orig_type != Item::REF_ITEM) ? item_field
339                                                          : nullptr);
340       }
341       if (result == nullptr) return nullptr;
342       if (modify_item) {
343         if (orig_type == Item::REF_ITEM)
344           orig_item->set_result_field(result);
345         else
346           item_field->set_result_field(result);
347       }
348       /*
349         Fields that are used as arguments to the DEFAULT() function already have
350         their data pointers set to the default value during name resolution. See
351         Item_default_value::fix_fields.
352       */
353       if (orig_type != Item::DEFAULT_VALUE_ITEM &&
354           item_field->field->eq_def(result))
355         *default_field = item_field->field;
356       *from_field = item_field->field;
357       break;
358     }
359     /* Fall through */
360     case Item::FUNC_ITEM:
361       if (down_cast<Item_func *>(item)->functype() == Item_func::FUNC_SP) {
362         Item_func_sp *item_func_sp = down_cast<Item_func_sp *>(item);
363         Field *sp_result_field = item_func_sp->get_sp_result_field();
364 
365         if (make_copy_field) {
366           DBUG_ASSERT(item_func_sp->get_result_field());
367           *from_field = item_func_sp->get_result_field();
368         } else {
369           copy_func->push_back(Func_ptr(item));
370         }
371 
372         result = create_tmp_field_from_field(thd, sp_result_field,
373                                              item_func_sp->item_name.ptr(),
374                                              table, nullptr);
375         if (!result) break;
376         if (modify_item) item_func_sp->set_result_field(result);
377         break;
378       }
379 
380       /* Fall through */
381     case Item::COND_ITEM:
382     case Item::FIELD_AVG_ITEM:
383     case Item::FIELD_BIT_ITEM:
384     case Item::FIELD_STD_ITEM:
385     case Item::FIELD_VARIANCE_ITEM:
386     case Item::SUBSELECT_ITEM:
387       /* The following can only happen with 'CREATE TABLE ... SELECT' */
388     case Item::PROC_ITEM:
389     case Item::INT_ITEM:
390     case Item::REAL_ITEM:
391     case Item::DECIMAL_ITEM:
392     case Item::STRING_ITEM:
393     case Item::REF_ITEM:
394     case Item::NULL_ITEM:
395     case Item::VARBIN_ITEM:
396     case Item::PARAM_ITEM:
397     case Item::SUM_FUNC_ITEM:
398       if (type == Item::SUM_FUNC_ITEM && !is_wf) {
399         Item_sum *item_sum = down_cast<Item_sum *>(item);
400         result = item_sum->create_tmp_field(group, table);
401         if (!result) my_error(ER_OUT_OF_RESOURCES, MYF(ME_FATALERROR));
402       } else {
403         /*
404           (2) we're windowing. The Item doesn't contain any not-yet-calculated
405           window function (per logic in our caller create_tmp_table()). So it
406           is an ordinary function or can be considered as such. We're creating
407           the OUT table using IN table as source, and we have previously
408           created a frame buffer (FB) using IN table as source. That previous
409           creation has set IN's item's result_field to be the FB field. Here
410           we save that FB field in from_field. Right after that,
411           create_tmp_field_from_item() sets IN's item's result_field to the
412           OUT field (which OUT field is the 'result' variable). We mark the
413           OUT field with FIELD_IS_MARKED. Later we detect the mark, and create
414           a Copy_field to from_field (FB) from the marked field (OUT). The end
415           situation is: IN's item's result_field is in OUT, enabling the
416           initial function evaluation and saving of its result in OUT; the
417           Copy_field from OUT to FB and back will allow buffering/restoration
418           of that result.
419         */
420         if (make_copy_field || (copy_result_field && !is_wf))  // (2)
421         {
422           *from_field = item->get_tmp_table_field();
423           DBUG_ASSERT(*from_field);
424         }
425 
426         result = create_tmp_field_from_item(item, table);
427         if (result == nullptr) return nullptr;
428         if (modify_item) item->set_result_field(result);
429         if (copy_func && !make_copy_field &&
430             item->real_item()->is_result_field())
431           copy_func->push_back(Func_ptr(item));
432         if (copy_result_field) result->set_flag(FIELD_IS_MARKED);
433       }
434       break;
435     case Item::TYPE_HOLDER:
436     case Item::VALUES_COLUMN_ITEM:
437       result = down_cast<Item_aggregate_type *>(item)->make_field_by_type(
438           table, thd->is_strict_mode());
439       break;
440     default:  // Doesn't have to be stored
441       DBUG_ASSERT(false);
442       break;
443   }
444   return result;
445 }
446 
447 /*
448   Set up column usage bitmaps for a temporary table
449 
450   IMPLEMENTATION
451     For temporary tables, we need one bitmap with all columns set and
452     a tmp_set bitmap to be used by things like filesort.
453 */
454 
setup_tmp_table_column_bitmaps(TABLE * table,uchar * bitmaps)455 static void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps) {
456   uint field_count = table->s->fields;
457   bitmap_init(&table->def_read_set, (my_bitmap_map *)bitmaps, field_count);
458   bitmap_init(&table->tmp_set,
459               (my_bitmap_map *)(bitmaps + bitmap_buffer_size(field_count)),
460               field_count);
461   bitmap_init(&table->cond_set,
462               (my_bitmap_map *)(bitmaps + bitmap_buffer_size(field_count) * 2),
463               field_count);
464   /* write_set and all_set are copies of read_set */
465   table->def_write_set = table->def_read_set;
466   table->s->all_set = table->def_read_set;
467   bitmap_set_all(&table->s->all_set);
468   table->default_column_bitmaps();
469   table->s->column_bitmap_size = bitmap_buffer_size(field_count);
470 }
471 
472 /**
473   Cache for the storage engine properties for the alternative temporary table
474   storage engines. This cache is initialized during startup of the server by
475   asking the storage engines for the values properties.
476 */
477 
478 class Cache_temp_engine_properties {
479  public:
480   static uint HEAP_MAX_KEY_LENGTH;
481   static uint TEMPTABLE_MAX_KEY_LENGTH;
482   static uint INNODB_MAX_KEY_LENGTH;
483   static uint HEAP_MAX_KEY_PART_LENGTH;
484   static uint TEMPTABLE_MAX_KEY_PART_LENGTH;
485   static uint INNODB_MAX_KEY_PART_LENGTH;
486   static uint HEAP_MAX_KEY_PARTS;
487   static uint TEMPTABLE_MAX_KEY_PARTS;
488   static uint INNODB_MAX_KEY_PARTS;
489 
490   static void init(THD *thd);
491 };
492 
init(THD * thd)493 void Cache_temp_engine_properties::init(THD *thd) {
494   handler *handler;
495   plugin_ref db_plugin;
496 
497   // Cache HEAP engine's
498   db_plugin = ha_lock_engine(nullptr, heap_hton);
499   handler =
500       get_new_handler((TABLE_SHARE *)nullptr, false, thd->mem_root, heap_hton);
501   HEAP_MAX_KEY_LENGTH = handler->max_key_length();
502   HEAP_MAX_KEY_PART_LENGTH = handler->max_key_part_length(nullptr);
503   HEAP_MAX_KEY_PARTS = handler->max_key_parts();
504   destroy(handler);
505   plugin_unlock(nullptr, db_plugin);
506   // Cache TempTable engine's
507   db_plugin = ha_lock_engine(nullptr, temptable_hton);
508   handler = get_new_handler((TABLE_SHARE *)nullptr, false, thd->mem_root,
509                             temptable_hton);
510   TEMPTABLE_MAX_KEY_LENGTH = handler->max_key_length();
511   TEMPTABLE_MAX_KEY_PART_LENGTH = handler->max_key_part_length(nullptr);
512   TEMPTABLE_MAX_KEY_PARTS = handler->max_key_parts();
513   destroy(handler);
514   plugin_unlock(nullptr, db_plugin);
515   // Cache INNODB engine's
516   db_plugin = ha_lock_engine(nullptr, innodb_hton);
517   handler = get_new_handler((TABLE_SHARE *)nullptr, false, thd->mem_root,
518                             innodb_hton);
519   INNODB_MAX_KEY_LENGTH = handler->max_key_length();
520   /*
521     For ha_innobase::max_supported_key_part_length(), the returned value
522     is constant. However, in innodb itself, the limitation
523     on key_part length is up to the ROW_FORMAT. In current trunk, internal
524     temp table's ROW_FORMAT is DYNAMIC. In order to keep the consistence
525     between server and innodb, here we hard-coded 3072 as the maximum of
526     key_part length supported by innodb until bug#20629014 is fixed.
527 
528     TODO: Remove the hard-code here after bug#20629014 is fixed.
529   */
530   INNODB_MAX_KEY_PART_LENGTH = 3072;
531   INNODB_MAX_KEY_PARTS = handler->max_key_parts();
532   destroy(handler);
533   plugin_unlock(nullptr, db_plugin);
534 }
535 
536 uint Cache_temp_engine_properties::HEAP_MAX_KEY_LENGTH = 0;
537 uint Cache_temp_engine_properties::TEMPTABLE_MAX_KEY_LENGTH = 0;
538 uint Cache_temp_engine_properties::INNODB_MAX_KEY_LENGTH = 0;
539 uint Cache_temp_engine_properties::HEAP_MAX_KEY_PART_LENGTH = 0;
540 uint Cache_temp_engine_properties::TEMPTABLE_MAX_KEY_PART_LENGTH = 0;
541 uint Cache_temp_engine_properties::INNODB_MAX_KEY_PART_LENGTH = 0;
542 uint Cache_temp_engine_properties::HEAP_MAX_KEY_PARTS = 0;
543 uint Cache_temp_engine_properties::TEMPTABLE_MAX_KEY_PARTS = 0;
544 uint Cache_temp_engine_properties::INNODB_MAX_KEY_PARTS = 0;
545 
546 /**
547   Initialize the storage engine properties for the alternative temporary table
548   storage engines.
549 */
init_cache_tmp_engine_properties()550 void init_cache_tmp_engine_properties() {
551   DBUG_ASSERT(!current_thd);
552   THD *thd = new THD();
553   thd->thread_stack = pointer_cast<char *>(&thd);
554   thd->store_globals();
555   Cache_temp_engine_properties::init(thd);
556   delete thd;
557 }
558 
559 /**
560   Get the minimum of max_key_length/part_length/parts.
561   The minimum is between HEAP engine and internal_tmp_disk_storage_engine.
562 
563   @param[out] max_key_length Minimum of max_key_length
564   @param[out] max_key_part_length Minimum of max_key_part_length
565   @param[out] max_key_parts  Minimum of max_key_parts
566 */
567 
get_max_key_and_part_length(uint * max_key_length,uint * max_key_part_length,uint * max_key_parts)568 void get_max_key_and_part_length(uint *max_key_length,
569                                  uint *max_key_part_length,
570                                  uint *max_key_parts) {
571   // Make sure these cached properties are initialized.
572   DBUG_ASSERT(Cache_temp_engine_properties::HEAP_MAX_KEY_LENGTH);
573 
574   *max_key_length =
575       std::min(Cache_temp_engine_properties::HEAP_MAX_KEY_LENGTH,
576                Cache_temp_engine_properties::INNODB_MAX_KEY_LENGTH);
577   *max_key_part_length =
578       std::min(Cache_temp_engine_properties::HEAP_MAX_KEY_PART_LENGTH,
579                Cache_temp_engine_properties::INNODB_MAX_KEY_PART_LENGTH);
580   *max_key_parts = std::min(Cache_temp_engine_properties::HEAP_MAX_KEY_PARTS,
581                             Cache_temp_engine_properties::INNODB_MAX_KEY_PARTS);
582 }
583 
584 /**
585   Create a temporary name for one field if the field_name is empty.
586 
587   @param thd          Thread handle
588   @param item         Item to name the field after
589 */
590 
create_tmp_table_field_tmp_name(THD * thd,Item * item)591 static const char *create_tmp_table_field_tmp_name(THD *thd, Item *item) {
592   StringBuffer<STRING_BUFFER_USUAL_SIZE> field_name;
593   const ulonglong save_bits = thd->variables.option_bits;
594   thd->variables.option_bits &= ~OPTION_QUOTE_SHOW_CREATE;
595   item->print(
596       thd, &field_name,
597       enum_query_type(QT_NO_DEFAULT_DB | QT_SUBSELECT_AS_ONLY_SELECT_NUMBER));
598   thd->variables.option_bits = save_bits;
599   return thd->mem_strdup(field_name.c_ptr_safe());
600 }
601 
602 /**
603   Helper function for create_tmp_table().
604 
605   Insert a field at the head of the hidden field area.
606 
607   @param table            Temporary table
608   @param default_field    Default value array pointer
609   @param from_field       Original field array pointer
610   @param blob_field       Array pointer to record fields index of blob type
611   @param field            The registed hidden field
612  */
613 
register_hidden_field(TABLE * table,Field ** default_field,Field ** from_field,uint * blob_field,Field * field)614 static void register_hidden_field(TABLE *table, Field **default_field,
615                                   Field **from_field, uint *blob_field,
616                                   Field *field) {
617   uint i;
618   Field **tmp_field = table->field;
619 
620   /* Increase all of registed fields index */
621   for (i = 0; i < table->s->fields; i++)
622     tmp_field[i]->set_field_index(tmp_field[i]->field_index() + 1);
623 
624   // Increase the field_index of visible blob field
625   for (i = 0; i < table->s->blob_fields; i++) blob_field[i]++;
626   // Insert field
627   table->field[-1] = field;
628   default_field[-1] = nullptr;
629   from_field[-1] = nullptr;
630   field->table = table;
631   field->orig_table = table;
632   field->set_field_index(0);
633 
634   // Keep the field from being expanded by SELECT *.
635   field->set_hidden(dd::Column::enum_hidden_type::HT_HIDDEN_SQL);
636 }
637 
638 /**
639   Helper function which evaluates correct TABLE_SHARE::real_row_type
640   for the temporary table.
641 */
set_real_row_type(TABLE * table)642 static void set_real_row_type(TABLE *table) {
643   HA_CREATE_INFO create_info;
644   create_info.row_type = table->s->row_type;
645   create_info.options |=
646       HA_LEX_CREATE_TMP_TABLE | HA_LEX_CREATE_INTERNAL_TMP_TABLE;
647   create_info.table_options = table->s->db_create_options;
648   table->s->real_row_type = table->file->get_real_row_type(&create_info);
649 }
650 
651 /**
652   Moves to the end of the 'copy_func' array the elements which contain a
653   reference to an expression of the SELECT list of 'select'.
654   @param[in,out]  copy_func  array to sort
655   @param          select     query block to search in.
656 */
sort_copy_func(const SELECT_LEX * select,Func_ptr_array * copy_func)657 static void sort_copy_func(const SELECT_LEX *select,
658                            Func_ptr_array *copy_func) {
659   /*
660     In the select->all_fields list, there are hidden elements first, then
661     non-hidden. Non-hidden are those of the SELECT list. Hidden ones are:
662     (a) those of GROUP BY, HAVING, ORDER BY
663     (b) those which have been extracted from higher-level elements (of the
664     SELECT, GROUP BY, etc) by split_sum_func() (when aggregates are
665     involved).
666 
667     Note that the clauses in (a) are allowed to reference a non-hidden
668     expression through an alias (e.g. "SELECT a+2 AS x GROUP BY x+3"). The
669     clauses in (b) can reference non-hidden expressions without aliases if they
670     have been generated in a query transformation (for example when transforming
671     an IN subquery to a correlated EXISTS subquery ("(x, y) IN (SELECT expr1,
672     expr2 ...)" -> "EXISTS (SELECT * ... HAVING x = expr1 AND y = expr2 ...").
673 
674     Let's go through the process of writing to the tmp table
675     (e.g. end_write(), end_write_group()). We also include here the
676     "pseudo-tmp table" embedded into REF_SLICE_ORDERED_GROUP_BY, used by
677     end_send_group().
678     (1) we switch to the REF_SLICE used to read from that tmp table
679     (2.1) we (copy_fields() part 1) copy some columns from the
680     output of the previous step of execution (e.g. the join's output) to the
681     tmp table
682     (2.2) (specifically for REF_SLICE_ORDERED_GROUP_BY in end_send_group()) we
683     (copy_fields() part 2) evaluate some expressions from the same previous
684     step of execution, with Item_copy::copy(). The mechanism of Item_copy is:
685     * copy() evaluates the expression and caches its value in memory
686     * val_*() returns the cached value;
687     so Item_copy::copy() for "a+2" evaluates "a+2" (using the join's value
688     of "a") and caches the value; then Item_copy::copy() for "x+3" evaluates
689     "x", through Item_ref (because of the alias), that Item_ref points to
690     the Item_copy for "a+2" (does not point to the "a+2" Item_func_plus
691     expression, as we advanced the REF_SLICE to TMP3); copy() on
692     "x+3" thus evaluates the Item_copy for "a+2" which returns the cached value.
693     This way, if "a+2" were rather some non-deterministic expression
694     (e.g. rand()), the logic above does only one evaluation of rand(), which is
695     correct (the two objects "x" and "a+2" in 'fields' thus have equal
696     values).
697     For this to work, the Item_copy for "x" must be copy()d after that
698     of "a+2", so it can use the value cached for "a+2". setup_copy_fields()
699     ensures this by putting Item_copy-s of hidden elements last.
700     (3) We are now done with copy_fields(). Next is copy_funcs(). It
701     is meant to evaluate expressions and store their values into the tmp table.
702     [ note that we could replace Item_copy in (2) with a real one-row tmp
703     table; then end_send_group() could just use copy_funcs() instead of
704     Item_copy: copy_funcs() would store into the tmp table's column which
705     would thus be the storage for the cached value ].
706     Because we advanced the REF_SLICE, when copy_funcs() evaluates an
707     expression which uses Item_ref, that Item_ref may point to a column of
708     the tmp table. It is thus important that this column has been filled
709     already. So the order of evaluation of expressions by copy_funcs() must
710     respect "dependencies".
711 
712     It is incorrect to evaluate elements of (a) first if they refer to
713     non-hidden elements through aliases. It is incorrect to evaluate elements of
714     (b) first if they refer to non-hidden elements. So, we partition the
715     elements below, moving to the end the ones which reference other expressions
716     in the same query block. We use a stable partitioning
717     (std::stable_partition), to avoid disturbing any dependency already
718     reflected in the order.
719 
720     A simpler and more robust solution would be to break the design that
721     hidden elements are always first in SELECT_LEX::all_fields: references
722     using aliases (in GROUP BY, HAVING, ORDER BY) would be added to
723     all_fields last (after the SELECT list); an inner element (split by
724     split_sum_func) would be added right before its containing element. That
725     would reflect dependencies naturally. But it is hard to implement, as
726     some code relies on the fact that non-hidden elements are last, and
727     other code relies on the fact that SELECT::fields is just a part of
728     SELECT::all_fields (i.e. they share 'next' pointers, in the
729     implementation).
730 
731     You may wonder why setup_copy_fields() can solve the dependency problem
732     by putting all hidden elements last, while for the copy_func array we
733     have a (more complex) sort. It's because setup_copy_fields() is for
734     end_send_group() which handles only queries with GROUP BY without ORDER
735     BY, window functions or DISTINCT. So the hidden elements produced by
736     split_sum_func are only group aggregates (not anything from WFs), which
737     setup_copy_fields() ignores: these aggregates are thus not cached
738     (neither in Item_copy, nor in a further tmp table's row as there's no tmp
739     table); so any parent item which references them,
740     if evaluated, will reach to the aggregate, not to any cache
741     materializing the aggregate, so will get an up-to-date value.
742     Whereas with window functions, it's possible to have a hidden element be an
743     aggregate (produced by split_sum_func) _and_ be materialized (into a
744     further tmp table), so we cannot ignore such Item anymore: we have to
745     leave it at the beginning of the copy_func array. Except if it contains
746     an alias to an expression of the SELECT list: in that case, the sorting
747     will move it to the end, but will also move the aliased expression, and
748     their relative order will remain unchanged thanks to stable_partition, so
749     their evaluation will be in the right order.
750 
751     So we walk each item to copy, put the ones that don't reference other
752     expressions in the query block first, and put those that reference other
753     expressions last.
754   */
755   const auto without_reference_to_select_expr = [select](const Func_ptr &ptr) {
756     // We cast 'const' away, but the walker will not modify '*select'.
757     uchar *walk_arg = const_cast<uchar *>(pointer_cast<const uchar *>(select));
758     return !ptr.func()->walk(&Item::references_select_expr_of,
759                              // the reference might be in a subquery
760                              enum_walk::SUBQUERY_PREFIX, walk_arg);
761   };
762   std::stable_partition(copy_func->begin(), copy_func->end(),
763                         without_reference_to_select_expr);
764 }
765 
766 /**
767   Helper function for create_tmp_table_* family for setting tmp table fields
768   to their place in record buffer
769 
770   @param field      field to set
771   @param pos        field's position in table's record buffer
772   @param null_flags beginning of table's null bits buffer
773   @param null_count  field's null bit in null bits buffer
774 */
775 
relocate_field(Field * field,uchar * pos,uchar * null_flags,uint * null_count)776 inline void relocate_field(Field *field, uchar *pos, uchar *null_flags,
777                            uint *null_count) {
778   if (!field->is_flag_set(NOT_NULL_FLAG)) {
779     field->move_field(pos, null_flags + *null_count / 8,
780                       (uint8)1 << (*null_count & 7));
781     (*null_count)++;
782   } else
783     field->move_field(pos, nullptr, 0);
784   if (field->type() == MYSQL_TYPE_BIT) {
785     /* We have to reserve place for extra bits among null bits */
786     ((Field_bit *)field)
787         ->set_bit_ptr(null_flags + *null_count / 8, *null_count & 7);
788     (*null_count) += (field->field_length & 7);
789   }
790   field->reset();
791 }
792 
793 /**
794   Create a temp table according to a field list.
795 
796   Given field pointers are changed to point at tmp_table for
797   send_result_set_metadata. The table object is self contained: it's
798   allocated in its own memory root, as well as Field objects
799   created for table columns. Those Field objects are common to TABLE and
800   TABLE_SHARE.
801   This function will replace Item_sum items in 'fields' list with
802   corresponding Item_field items, pointing at the fields in the
803   temporary table, unless save_sum_fields is set to false.
804   The Item_field objects are created in THD memory root.
805 
806   @param thd                  thread handle
807   @param param                a description used as input to create the table
808   @param fields               list of items that will be used to define
809                               column types of the table (also see NOTES)
810   @param group                Group key to use for temporary table, NULL if
811   none
812   @param distinct             should table rows be distinct
813   @param save_sum_fields      see NOTES
814   @param select_options
815   @param rows_limit
816   @param table_alias          possible name of the temporary table that can
817                               be used for name resolving; can be "".
818 
819   @remark mysql_create_view() checks that views have less than
820           MAX_FIELDS columns.
821 
822   @remark We may actually end up with a table without any columns at all.
823           See comment below: We don't have to store this.
824 */
825 
826 #define STRING_TOTAL_LENGTH_TO_PACK_ROWS 128
827 #define AVG_STRING_LENGTH_TO_PACK_ROWS 64
828 #define RATIO_TO_PACK_ROWS 2
829 
create_tmp_table(THD * thd,Temp_table_param * param,List<Item> & fields,ORDER * group,bool distinct,bool save_sum_fields,ulonglong select_options,ha_rows rows_limit,const char * table_alias)830 TABLE *create_tmp_table(THD *thd, Temp_table_param *param, List<Item> &fields,
831                         ORDER *group, bool distinct, bool save_sum_fields,
832                         ulonglong select_options, ha_rows rows_limit,
833                         const char *table_alias) {
834   DBUG_TRACE;
835   if (!param->allow_group_via_temp_table)
836     group = nullptr;  // Can't use group key
837 
838   if (group != nullptr) distinct = false;  // Can't use distinct
839 
840   for (ORDER *tmp = group; tmp; tmp = tmp->next) {
841     /*
842       marker == MARKER_BIT means two things:
843       - store NULLs in the key, and
844       - convert BIT fields to 64-bit long, needed because MEMORY tables
845         can't index BIT fields.
846     */
847     (*tmp->item)->marker = Item::MARKER_BIT;
848   }
849 
850   /**
851     When true, enforces unique constraint (by adding a hidden hash_field and
852     creating a key over this field) when:
853     (1) unique key is too long, or
854     (2) number of key parts in distinct key is too big, or
855     (3) the caller has requested it.
856   */
857   bool unique_constraint_via_hash_field = false;
858 
859   /*
860     When loose index scan is employed as access method, it already
861     computes all groups and the result of all aggregate functions. We
862     make space for the items of the aggregate function in the list of
863     functions Temp_table_param::items_to_copy, so that the values of
864     these items are stored in the temporary table.
865   */
866   uint copy_func_count = param->func_count;
867   if (param->precomputed_group_by) copy_func_count += param->sum_func_count;
868   /* Treat sum functions as normal ones when loose index scan is used. */
869   save_sum_fields |= param->precomputed_group_by;
870 
871   // 4096 since (sizeof(TABLE) + sizeof(TABLE_SHARE) ~= 3KB)
872   MEM_ROOT own_root(key_memory_TABLE, 4096);
873 
874   param->keyinfo = static_cast<KEY *>(own_root.Alloc(sizeof(*param->keyinfo)));
875 
876   const uint field_count =
877       param->field_count + param->func_count + param->sum_func_count;
878   try {
879     param->copy_fields.reserve(field_count);
880   } catch (std::bad_alloc &) {
881     return nullptr;
882   }
883 
884   TABLE_SHARE *share = new (&own_root) TABLE_SHARE;
885   TABLE *table = new (&own_root) TABLE;
886   if (table == nullptr || share == nullptr) return nullptr;
887 
888   // NOTE: reg_field/default_field/from_field correspond 1:1 to each other,
889   // except that reg_field contains an extra nullptr marker at the end.
890   // (They should have been a struct, but we cannot, since the reg_field
891   // array ends up in the TABLE object, which expects a flat array.)
892   // blob_field is a separate array, which indexes into these.
893   Field **reg_field = own_root.ArrayAlloc<Field *>(field_count + 2);
894   Field **default_field = own_root.ArrayAlloc<Field *>(field_count + 1);
895   Field **from_field = own_root.ArrayAlloc<Field *>(field_count + 1);
896   uint *blob_field = own_root.ArrayAlloc<uint>(field_count + 2);
897   if (reg_field == nullptr || default_field == nullptr ||
898       from_field == nullptr || blob_field == nullptr)
899     return nullptr;
900   memset(reg_field, 0, sizeof(Field *) * (field_count + 2));
901   memset(default_field, 0, sizeof(Field *) * (field_count + 1));
902   memset(from_field, 0, sizeof(Field *) * (field_count + 1));
903 
904   // Leave the first place to be prepared for hash_field
905   reg_field++;
906   default_field++;
907   from_field++;
908   table->init_tmp_table(thd, share, &own_root, param->table_charset,
909                         table_alias, reg_field, blob_field, false);
910 
911   auto free_tmp_table_guard =
912       create_scope_guard([thd, table] { free_tmp_table(thd, table); });
913 
914   /*
915     We will use TABLE_SHARE's MEM_ROOT for all allocations, so TABLE's
916     MEM_ROOT remains uninitialized.
917     TABLE_SHARE's MEM_ROOT is a copy of own_root, upon error free_tmp_table()
918     will free it.
919   */
920   Swap_mem_root_guard mem_root_guard(thd, &share->mem_root);
921 
922   param->items_to_copy =
923       new (&share->mem_root) Func_ptr_array(&share->mem_root);
924   if (param->items_to_copy == nullptr) return nullptr; /* purecov: inspected */
925   if (param->items_to_copy->reserve(copy_func_count)) return nullptr;
926 
927   if (param->schema_table) share->db = INFORMATION_SCHEMA_NAME;
928 
929   /* Calculate which type of fields we will store in the temporary table */
930 
931   share->reclength = 0;
932   ulong string_total_length = 0;
933   ulong distinct_key_length = 0;
934   uint null_count = 0;
935   uint hidden_null_count = 0;
936   share->blob_fields = 0;
937   uint group_null_items = 0;
938   uint string_count = 0;
939   uint fieldnr = 0;
940   param->using_outer_summary_function = false;
941   long hidden_field_count = param->hidden_field_count;
942   const bool not_all_columns = !(select_options & TMP_TABLE_ALL_COLUMNS);
943   /*
944     total_uneven_bit_length is uneven bit length for visible fields
945     hidden_uneven_bit_length is uneven bit length for hidden fields
946   */
947   uint total_uneven_bit_length = 0;
948   uint hidden_uneven_bit_length = 0;
949 
950   for (Item &refitem : fields) {
951     Item *item = &refitem;
952     Item::Type type = item->type();
953     const bool is_sum_func =
954         type == Item::SUM_FUNC_ITEM && !item->m_is_window_function;
955 
956     if (type == Item::COPY_STR_ITEM) {
957       item = down_cast<Item_copy *>(item)->get_item();
958       type = item->type();
959     }
960 
961     bool store_column = true;
962     if (not_all_columns) {
963       if (item->has_aggregation() && type != Item::SUM_FUNC_ITEM) {
964         if (item->used_tables() & OUTER_REF_TABLE_BIT)
965           item->update_used_tables();
966         if (type == Item::SUBSELECT_ITEM ||
967             (item->used_tables() & ~OUTER_REF_TABLE_BIT)) {
968           /*
969             Mark that we have ignored an item that refers to a summary
970             function. We need to know this if someone is going to use
971             DISTINCT on the result.
972           */
973           param->using_outer_summary_function = true;
974           store_column = false;
975         }
976       } else if (item->m_is_window_function) {
977         if (!param->m_window || param->m_window_frame_buffer) {
978           /*
979             A pre-windowing table; no point in storing WF.
980             Or a window's frame buffer:
981             - the window's WFs cannot be calculated yet
982             - same for later windows' WFs
983             - previous windows' WFs are already replaced with Item_field (so
984             don't come here).
985           */
986           store_column = false;
987         } else if (param->m_window != down_cast<Item_sum *>(item)->window()) {
988           // A later window's WF: no point in storing it in this table.
989           store_column = false;
990         }
991       } else if (item->has_wf()) {
992         /*
993           A non-WF expression containing a WF conservatively requires all
994           windows to have been processed, and is not stored in any of
995           windowing tables until the last one.
996         */
997         if (param->m_window == nullptr || !param->m_window->is_last())
998           store_column = false;
999       }
1000       if (item->const_item() && hidden_field_count <= 0)
1001         continue;  // We don't have to store this
1002     }
1003 
1004     if (store_column && is_sum_func && !group &&
1005         !save_sum_fields) { /* Can't calc group yet */
1006       Item_sum *sum_item = down_cast<Item_sum *>(item);
1007       for (uint i = 0; i < sum_item->get_arg_count(); i++) {
1008         DBUG_ASSERT(!distinct);
1009         Item *arg = sum_item->get_arg(i);
1010         if (!arg->const_item()) {
1011           Field *new_field = create_tmp_field(
1012               thd, table, arg, arg->type(), param->items_to_copy,
1013               &from_field[fieldnr], &default_field[fieldnr], group != nullptr,
1014               not_all_columns, false, false, false);
1015           if (new_field == nullptr) return nullptr;  // Should be OOM
1016           new_field->set_field_index(fieldnr);
1017           reg_field[fieldnr++] = new_field;
1018           share->reclength += new_field->pack_length();
1019           if (new_field->is_flag_set(BLOB_FLAG)) {
1020             *blob_field++ = new_field->field_index();
1021             share->blob_fields++;
1022           }
1023           if (new_field->type() == MYSQL_TYPE_BIT)
1024             total_uneven_bit_length += new_field->field_length & 7;
1025           if (new_field->real_type() == MYSQL_TYPE_STRING ||
1026               new_field->real_type() == MYSQL_TYPE_VARCHAR) {
1027             string_count++;
1028             string_total_length += new_field->pack_length();
1029           }
1030 
1031           thd->mem_root = mem_root_guard.old_mem_root();
1032           arg = sum_item->set_arg(i, thd, new Item_field(new_field));
1033           thd->mem_root = &share->mem_root;
1034 
1035           if (!new_field->is_flag_set(NOT_NULL_FLAG)) {
1036             null_count++;
1037             /*
1038               new_field->maybe_null() is still false, it will be
1039               changed below. But we have to setup Item_field correctly
1040             */
1041             arg->maybe_null = true;
1042           }
1043           /* InnoDB temp table doesn't allow field with empty_name */
1044           if (!new_field->field_name)
1045             new_field->field_name = create_tmp_table_field_tmp_name(thd, item);
1046         }
1047       }
1048     } else if (store_column) {
1049       Field *new_field;
1050       if (param->schema_table) {
1051         new_field = create_tmp_field_for_schema(item, table);
1052       } else {
1053         /*
1054           Parameters of create_tmp_field():
1055 
1056           (1) is a bit tricky:
1057           We need to set it to 0 in union, to get fill_record() to modify the
1058           temporary table.
1059           We need to set it to 1 on multi-table-update and in select to
1060           write rows to the temporary table.
1061           We here distinguish between UNION and multi-table-updates by the fact
1062           that in the later case group is set to the row pointer.
1063           (2) If item->marker == MARKER_BIT then we force create_tmp_field
1064           to create a 64-bit longs for BIT fields because HEAP
1065           tables can't index BIT fields directly. We do the same
1066           for distinct, as we want the distinct index to be
1067           usable in this case too.
1068           (3) This is the OUT table of windowing, there is a frame buffer, and
1069           the item is an expression which can store its value in a result_field
1070           (e.g. it is Item_func). In that case we pass copy_result_field=true.
1071         */
1072         new_field = create_tmp_field(
1073             thd, table, item, type, param->items_to_copy, &from_field[fieldnr],
1074             &default_field[fieldnr],
1075             group != nullptr,  // (1)
1076             !param->force_copy_fields && (not_all_columns || group != nullptr),
1077             item->marker == Item::MARKER_BIT ||
1078                 param->bit_fields_as_long,  //(2)
1079             param->force_copy_fields,
1080             (param->m_window &&  // (3)
1081              param->m_window->frame_buffer_param() && item->is_result_field()));
1082       }
1083 
1084       if (!new_field) {
1085         DBUG_ASSERT(thd->is_fatal_error());
1086         return nullptr;  // Got OOM
1087       }
1088       /*
1089         Some group aggregate function use result_field to maintain their
1090         current value (e.g. Item_avg_field stores both count and sum there).
1091         But only for the group-by table. So do not set result_field if this is
1092         a tmp table for UNION or derived table materialization.
1093       */
1094       if (not_all_columns && type == Item::SUM_FUNC_ITEM)
1095         down_cast<Item_sum *>(item)->set_result_field(new_field);
1096       share->reclength += new_field->pack_length();
1097       if (!new_field->is_flag_set(NOT_NULL_FLAG)) null_count++;
1098       if (new_field->type() == MYSQL_TYPE_BIT)
1099         total_uneven_bit_length += new_field->field_length & 7;
1100       if (new_field->is_flag_set(BLOB_FLAG)) {
1101         *blob_field++ = fieldnr;
1102         share->blob_fields++;
1103       }
1104 
1105       if (new_field->real_type() == MYSQL_TYPE_STRING ||
1106           new_field->real_type() == MYSQL_TYPE_VARCHAR) {
1107         string_count++;
1108         string_total_length += new_field->pack_length();
1109       }
1110       // In order to reduce footprint ask SE to pack variable-length fields.
1111       if (new_field->type() == MYSQL_TYPE_VAR_STRING ||
1112           new_field->type() == MYSQL_TYPE_VARCHAR)
1113         table->s->db_create_options |= HA_OPTION_PACK_RECORD;
1114 
1115       if (item->marker == Item::MARKER_BIT && item->maybe_null) {
1116         group_null_items++;
1117         new_field->set_flag(GROUP_FLAG);
1118       }
1119       new_field->set_field_index(fieldnr);
1120       reg_field[fieldnr++] = new_field;
1121       /* InnoDB temp table doesn't allow field with empty_name */
1122       if (!new_field->field_name) {
1123         new_field->field_name = create_tmp_table_field_tmp_name(thd, item);
1124       }
1125 
1126       /*
1127         Calculate length of distinct key. The goal is to decide what to use -
1128         key or unique constraint. As blobs force unique constraint on their
1129         own due to their length, they aren't taken into account.
1130       */
1131       if (distinct && hidden_field_count <= 0) {
1132         if (new_field->is_flag_set(BLOB_FLAG))
1133           unique_constraint_via_hash_field = true;
1134         else
1135           distinct_key_length += new_field->pack_length();
1136       }
1137     }
1138 
1139     hidden_field_count--;
1140     if (hidden_field_count == 0) {
1141       /*
1142         This was the last hidden field; Remember how many hidden fields could
1143         have null
1144       */
1145       hidden_null_count = null_count;
1146       /*
1147         We need to update hidden_field_count as we may have stored group
1148         functions with constant arguments
1149       */
1150       param->hidden_field_count = fieldnr;
1151       null_count = 0;
1152       /*
1153         On last hidden field we store uneven bit length in
1154         hidden_uneven_bit_length and proceed calculation of
1155         uneven bits for visible fields into
1156         total_uneven_bit_length variable.
1157       */
1158       hidden_uneven_bit_length = total_uneven_bit_length;
1159       total_uneven_bit_length = 0;
1160     }
1161   }  // end of for
1162 
1163   DBUG_ASSERT(field_count >= fieldnr);
1164 
1165   reg_field[fieldnr] = nullptr;
1166   *blob_field = 0;  // End marker
1167   share->fields = fieldnr;
1168 
1169   /*
1170     Different temp table engine supports different max_key_length
1171     and max_key_part_length. If HEAP engine is selected, it can be
1172     possible to convert into on-disk engine later. We must choose
1173     the minimal of max_key_length and max_key_part_length between
1174     HEAP engine and possible on-disk engine to verify whether unique
1175     constraint is needed so that the conversion goes well.
1176    */
1177   uint max_key_length;
1178   uint max_key_part_length;
1179   uint max_key_parts;
1180   get_max_key_and_part_length(&max_key_length, &max_key_part_length,
1181                               &max_key_parts);
1182 
1183   if (group) {
1184     DBUG_PRINT("info", ("Creating group key in temporary table"));
1185     table->group = group; /* Table is grouped by key */
1186     share->keys = 1;
1187     // Let each group expression know the column which materializes its value
1188     for (ORDER *cur_group = group; cur_group; cur_group = cur_group->next) {
1189       Field *field = (*cur_group->item)->get_tmp_table_field();
1190       DBUG_ASSERT(field->table == table);
1191       cur_group->field_in_tmp_table = field;
1192 
1193       if ((*cur_group->item)->max_char_length() > CONVERT_IF_BIGGER_TO_BLOB)
1194         unique_constraint_via_hash_field = true;
1195     }
1196     if (param->group_parts > max_key_parts ||
1197         param->group_length > max_key_length ||
1198         param->group_length >= MAX_BLOB_WIDTH)
1199       unique_constraint_via_hash_field = true;
1200     // Use key definition created below only if the key isn't too long.
1201     // Otherwise a dedicated key over a hash value will be created and this
1202     // definition will be used by server to calc hash.
1203     if (!unique_constraint_via_hash_field) {
1204       param->keyinfo->table = table;
1205       param->keyinfo->is_visible = true;
1206       KEY_PART_INFO *key_part_info =
1207           share->mem_root.ArrayAlloc<KEY_PART_INFO>(param->group_parts + 1);
1208       if (key_part_info == nullptr) return nullptr;
1209       param->keyinfo->key_part = key_part_info;
1210       param->keyinfo->flags = HA_NOSAME;
1211       param->keyinfo->actual_flags = param->keyinfo->flags;
1212       param->keyinfo->usable_key_parts = param->group_parts;
1213       param->keyinfo->user_defined_key_parts = param->group_parts;
1214       param->keyinfo->actual_key_parts = param->keyinfo->user_defined_key_parts;
1215       param->keyinfo->rec_per_key = nullptr;
1216       // keyinfo->algorithm is set later, when storage engine is known
1217       param->keyinfo->set_rec_per_key_array(nullptr, nullptr);
1218       param->keyinfo->set_in_memory_estimate(IN_MEMORY_ESTIMATE_UNKNOWN);
1219       param->keyinfo->name = "<group_key>";
1220       for (ORDER *cur_group = group; cur_group;
1221            cur_group = cur_group->next, key_part_info++) {
1222         Field *field = cur_group->field_in_tmp_table;
1223         key_part_info->init_from_field(field);
1224 
1225         /* In GROUP BY 'a' and 'a ' are equal for VARCHAR fields */
1226         key_part_info->key_part_flag |= HA_END_SPACE_ARE_EQUAL;
1227 
1228         if (key_part_info->store_length > max_key_part_length) {
1229           unique_constraint_via_hash_field = true;
1230           break;
1231         }
1232       }
1233       table->key_info = param->keyinfo;
1234       share->key_info = param->keyinfo;
1235       share->key_parts = param->keyinfo->user_defined_key_parts;
1236     }
1237   } else if (distinct && share->fields != param->hidden_field_count) {
1238     /*
1239       Create an unique key or an unique constraint over all columns
1240       that should be in the result.  In the temporary table, there are
1241       'param->hidden_field_count' extra columns, whose null bits are stored
1242       in the first 'hidden_null_pack_length' bytes of the row.
1243     */
1244     DBUG_PRINT("info", ("hidden_field_count: %d", param->hidden_field_count));
1245     share->keys = 1;
1246     table->is_distinct = true;
1247     if (!unique_constraint_via_hash_field) {
1248       param->keyinfo->table = table;
1249       param->keyinfo->is_visible = true;
1250       param->keyinfo->user_defined_key_parts =
1251           share->fields - param->hidden_field_count;
1252       param->keyinfo->actual_key_parts = param->keyinfo->user_defined_key_parts;
1253       KEY_PART_INFO *key_part_info = share->mem_root.ArrayAlloc<KEY_PART_INFO>(
1254           param->keyinfo->user_defined_key_parts);
1255       if (key_part_info == nullptr) return nullptr;
1256       param->keyinfo->key_part = key_part_info;
1257       param->keyinfo->flags = HA_NOSAME | HA_NULL_ARE_EQUAL;
1258       param->keyinfo->actual_flags = param->keyinfo->flags;
1259       param->keyinfo->name = "<auto_distinct_key>";
1260       // keyinfo->algorithm is set later, when storage engine is known
1261       param->keyinfo->set_in_memory_estimate(IN_MEMORY_ESTIMATE_UNKNOWN);
1262 
1263       // Set up records-per-key estimates.
1264       ulong *rec_per_key = share->mem_root.ArrayAlloc<ulong>(
1265           param->keyinfo->user_defined_key_parts);
1266       rec_per_key_t *rec_per_key_float =
1267           share->mem_root.ArrayAlloc<rec_per_key_t>(
1268               param->keyinfo->user_defined_key_parts);
1269       if (rec_per_key == nullptr || rec_per_key_float == nullptr)
1270         return nullptr;
1271       param->keyinfo->set_rec_per_key_array(rec_per_key, rec_per_key_float);
1272       for (unsigned key_part_idx = 0;
1273            key_part_idx < param->keyinfo->user_defined_key_parts;
1274            ++key_part_idx) {
1275         param->keyinfo->rec_per_key[key_part_idx] = 0;
1276         param->keyinfo->set_records_per_key(key_part_idx, REC_PER_KEY_UNKNOWN);
1277       }
1278 
1279       /* Create a distinct key over the columns we are going to return */
1280       for (unsigned i = param->hidden_field_count; i < share->fields;
1281            i++, key_part_info++) {
1282         key_part_info->init_from_field(table->field[i]);
1283         if (key_part_info->store_length > max_key_part_length) {
1284           unique_constraint_via_hash_field = true;
1285           break;
1286         }
1287       }
1288       table->key_info = param->keyinfo;
1289       share->key_info = param->keyinfo;
1290       share->key_parts = param->keyinfo->user_defined_key_parts;
1291     }
1292   }
1293 
1294   /*
1295     To enforce unique constraint we need to add a field to hold key's hash
1296     A1) distinct key is too long
1297     A2) number of keyparts in distinct key is too big
1298     A3) caller cannot accept distinct via indexes (e.g. because it wants
1299         to turn off the checking at some point)
1300   */
1301   if (distinct) {
1302     if (distinct_key_length > max_key_length ||                   // 1
1303         (fieldnr - param->hidden_field_count) > max_key_parts ||  // 2
1304         param->force_hash_field_for_unique) {                     // 3
1305       unique_constraint_via_hash_field = true;
1306     }
1307   }
1308 
1309   if (unique_constraint_via_hash_field) {
1310     Field_longlong *field = new (&share->mem_root)
1311         Field_longlong(sizeof(ulonglong), false, "<hash_field>", true);
1312     if (!field) {
1313       /* purecov: begin inspected */
1314       DBUG_ASSERT(thd->is_fatal_error());
1315       return nullptr;  // Got OOM
1316                        /* purecov: end */
1317     }
1318 
1319     // Mark hash_field as NOT NULL
1320     field->set_flag(NOT_NULL_FLAG);
1321     // Register hash_field as a hidden field.
1322     register_hidden_field(table, &default_field[0], &from_field[0],
1323                           share->blob_field, field);
1324     // Repoint arrays
1325     table->field--;
1326     default_field--;
1327     from_field--;
1328     share->reclength += field->pack_length();
1329     share->fields = ++fieldnr;
1330     param->hidden_field_count++;
1331     share->field--;
1332     table->hash_field = field;
1333   }
1334 
1335   if (setup_tmp_table_handler(table, select_options, false,
1336                               param->schema_table))
1337     return nullptr; /* purecov: inspected */
1338 
1339   if (table->s->keys == 1 && table->key_info)
1340     table->key_info->algorithm = table->file->get_default_index_algorithm();
1341 
1342   table->hidden_field_count = param->hidden_field_count;
1343 
1344   if (!unique_constraint_via_hash_field)
1345     share->reclength += group_null_items;  // null flag is stored separately
1346 
1347   if (share->blob_fields == 0) {
1348     /* We need to ensure that first byte is not 0 for the delete link */
1349     if (param->hidden_field_count)
1350       hidden_null_count++;
1351     else
1352       null_count++;
1353   }
1354   uint hidden_null_pack_length =
1355       (hidden_null_count + 7 + hidden_uneven_bit_length) / 8;
1356   share->null_bytes = (hidden_null_pack_length +
1357                        (null_count + total_uneven_bit_length + 7) / 8);
1358   share->reclength += share->null_bytes;
1359   if (share->reclength == 0) share->reclength = 1;  // Dummy select
1360 
1361   share->null_fields = null_count + hidden_null_count;
1362 
1363   if (alloc_record_buffers(table)) return nullptr;
1364 
1365   uchar *pos = table->record[0] + share->null_bytes;
1366   null_count = (share->blob_fields == 0) ? 1 : 0;
1367   hidden_field_count = param->hidden_field_count;
1368   DBUG_ASSERT((uint)hidden_field_count <= share->fields);
1369   for (uint i = 0; i < share->fields; i++) {
1370     Field *field = table->field[i];
1371 
1372     if (!field->is_flag_set(NOT_NULL_FLAG)) {
1373       if (field->is_flag_set(GROUP_FLAG) && !unique_constraint_via_hash_field) {
1374         /*
1375           We have to reserve one byte here for NULL bits,
1376           as this is updated by 'end_update()'
1377         */
1378         *pos++ = 0;  // Null is stored here
1379       }
1380     }
1381     relocate_field(field, pos, table->record[0], &null_count);
1382     pos += field->pack_length();
1383     if (!--hidden_field_count)
1384       null_count = (null_count + 7) & ~7;  // move to next byte
1385   }
1386 
1387   /* Use packed rows if there is blobs or a lot of space to gain */
1388   bool use_packed_rows = false;
1389   if (share->blob_fields != 0 ||
1390       (string_total_length >= STRING_TOTAL_LENGTH_TO_PACK_ROWS &&
1391        (share->reclength / string_total_length <= RATIO_TO_PACK_ROWS ||
1392         string_total_length / string_count >= AVG_STRING_LENGTH_TO_PACK_ROWS)))
1393     use_packed_rows = true;
1394 
1395   if (!use_packed_rows) share->db_create_options &= ~HA_OPTION_PACK_RECORD;
1396 
1397   param->func_count = param->items_to_copy->size();
1398   DBUG_ASSERT(param->func_count <= copy_func_count);  // Used <= allocated
1399   sort_copy_func(thd->lex->current_select(), param->items_to_copy);
1400   uchar *bitmaps = static_cast<uchar *>(
1401       share->mem_root.Alloc(bitmap_buffer_size(field_count + 1) * 3));
1402   if (bitmaps == nullptr) return nullptr;
1403   setup_tmp_table_column_bitmaps(table, bitmaps);
1404 
1405   for (uint i = 0; i < share->fields; i++) {
1406     Field *field = table->field[i];
1407     /*
1408       Test if there is a default field value. The test for ->ptr is to skip
1409       'offset' fields generated by initalize_tables
1410     */
1411     if (default_field[i] && default_field[i]->field_ptr() != nullptr) {
1412       /*
1413          default_field[i] is set only in the cases  when 'field' can
1414          inherit the default value that is defined for the field referred
1415          by the Item_field object from which 'field' has been created.
1416       */
1417       Field *orig_field = default_field[i];
1418       /*
1419         Get the value from default_values. Note that orig_field->ptr might not
1420         point into record[0] if previous step is REF_SLICE_ORDERED_GROUP_BY and
1421         we are creating a tmp table to materialize the query's result.
1422       */
1423       ptrdiff_t diff = orig_field->table->default_values_offset();
1424       Field *f_in_record0 = orig_field->table->field[orig_field->field_index()];
1425       if (f_in_record0->is_real_null(diff))
1426         field->set_null();
1427       else {
1428         field->set_notnull();
1429         memcpy(field->field_ptr(), f_in_record0->field_ptr() + diff,
1430                field->pack_length());
1431       }
1432     }
1433 
1434     if (from_field[i]) {
1435       /* This column is directly mapped to a column in the GROUP BY clause. */
1436       if (param->m_window && param->m_window->frame_buffer_param() &&
1437           field->is_flag_set(FIELD_IS_MARKED)) {
1438         Temp_table_param *window_fb = param->m_window->frame_buffer_param();
1439         // Grep for FIELD_IS_MARKED in this file.
1440         field->is_flag_set(FIELD_IS_MARKED) ? field->clear_flag(FIELD_IS_MARKED)
1441                                             : field->set_flag(FIELD_IS_MARKED);
1442         window_fb->copy_fields.emplace_back(from_field[i], field,
1443                                             save_sum_fields);
1444       } else {
1445         param->copy_fields.emplace_back(field, from_field[i], save_sum_fields);
1446       }
1447     }
1448 
1449     // fix table name in field entry
1450     field->table_name = &table->alias;
1451   }
1452 
1453   store_record(table, s->default_values);  // Make empty default record
1454 
1455   /*
1456     Push the LIMIT clause to the temporary table creation, so that we
1457     materialize only up to 'rows_limit' records instead of all result records.
1458   */
1459   share->max_rows = std::min(share->max_rows, rows_limit);
1460   param->end_write_records = rows_limit;
1461 
1462   if (group && !unique_constraint_via_hash_field) {
1463     if (param->can_use_pk_for_unique) share->primary_key = 0;
1464     param->keyinfo->key_length = 0;  // Will compute the sum of the parts below.
1465     /*
1466       Here, we have to make the group fields point to the right record
1467       position.
1468     */
1469     KEY_PART_INFO *key_part_info = param->keyinfo->key_part;
1470     param->group_buff = share->mem_root.ArrayAlloc<uchar>(param->group_length);
1471     if (param->group_buff == nullptr) return nullptr;
1472     uchar *group_buff = param->group_buff;
1473     for (ORDER *cur_group = group; cur_group;
1474          cur_group = cur_group->next, key_part_info++) {
1475       Field *field = cur_group->field_in_tmp_table;
1476       const bool maybe_null = (*cur_group->item)->maybe_null;
1477       key_part_info->init_from_field(key_part_info->field);
1478       param->keyinfo->key_length += key_part_info->store_length;
1479 
1480       cur_group->buff = pointer_cast<char *>(group_buff);
1481       cur_group->field_in_tmp_table =
1482           field->new_key_field(thd->mem_root, table, group_buff + maybe_null);
1483 
1484       if (!cur_group->field_in_tmp_table)
1485         return nullptr; /* purecov: inspected */
1486 
1487       if (maybe_null) {
1488         /*
1489           To be able to group on NULL, we reserved place in group_buff
1490           for the NULL flag just before the column. (see above).
1491           The field data is after this flag.
1492           The NULL flag is updated in 'end_update()' and 'end_write()'
1493         */
1494         param->keyinfo->flags |= HA_NULL_ARE_EQUAL;  // def. that NULL == NULL
1495         cur_group->buff++;                           // Pointer to field data
1496         group_buff++;                                // Skipp null flag
1497       }
1498       group_buff += cur_group->field_in_tmp_table->pack_length();
1499     }
1500   }
1501 
1502   if (distinct && share->fields != param->hidden_field_count &&
1503       !unique_constraint_via_hash_field) {
1504     if (param->can_use_pk_for_unique) share->primary_key = 0;
1505     param->keyinfo->key_length = 0;  // Will compute the sum of the parts below.
1506     /*
1507       Here, we have to make the key fields point to the right record
1508       position.
1509     */
1510     KEY_PART_INFO *key_part_info = param->keyinfo->key_part;
1511     for (uint i = param->hidden_field_count; i < share->fields;
1512          i++, key_part_info++) {
1513       key_part_info->init_from_field(table->field[i]);
1514       param->keyinfo->key_length += key_part_info->store_length;
1515     }
1516   }
1517 
1518   // Create a key over hash_field to enforce unique constraint
1519   if (unique_constraint_via_hash_field) {
1520     KEY *hash_key;
1521     KEY_PART_INFO *hash_kpi;
1522 
1523     if (!multi_alloc_root(&share->mem_root, &hash_key, sizeof(*hash_key),
1524                           &hash_kpi, sizeof(*hash_kpi),  // Only one key part
1525                           NullS))
1526       return nullptr;
1527     table->key_info = share->key_info = hash_key;
1528     share->key_parts = 1;
1529     hash_key->table = table;
1530     hash_key->key_part = hash_kpi;
1531     hash_key->actual_flags = hash_key->flags = HA_NULL_ARE_EQUAL;
1532     hash_key->actual_key_parts = hash_key->usable_key_parts = 1;
1533     hash_key->user_defined_key_parts = 1;
1534     hash_key->set_rec_per_key_array(nullptr, nullptr);
1535     hash_key->algorithm = table->file->get_default_index_algorithm();
1536     hash_key->set_in_memory_estimate(IN_MEMORY_ESTIMATE_UNKNOWN);
1537     if (distinct)
1538       hash_key->name = "<hash_distinct_key>";
1539     else
1540       hash_key->name = "<hash_group_key>";
1541     hash_kpi->init_from_field(table->hash_field);
1542     hash_key->key_length = hash_kpi->store_length;
1543     param->keyinfo = hash_key;
1544   }
1545 
1546   if (thd->is_fatal_error())  // If end of memory
1547     return nullptr;           /* purecov: inspected */
1548 
1549   set_real_row_type(table);
1550 
1551   if (!param->skip_create_table) {
1552     if (instantiate_tmp_table(thd, table)) return nullptr;
1553   }
1554 
1555   DEBUG_SYNC(thd, "tmp_table_created");
1556 
1557   free_tmp_table_guard.commit();
1558 
1559   return table;
1560 }
1561 
1562 /**
1563   Create a temporary table to weed out duplicate rowid combinations
1564 
1565 
1566   @param    thd                    Thread handle
1567   @param    uniq_tuple_length_arg  Length of the table's column
1568   @param    sjtbl                  Update sjtbl->[start_]recinfo values which
1569                              will be needed if we'll need to convert the
1570                              created temptable from HEAP to MyISAM/Maria.
1571 
1572   @details
1573     create_duplicate_weedout_tmp_table()
1574 
1575     Create a temporary table to weed out duplicate rowid combinations. The
1576     table has a single column that is a concatenation of all rowids in the
1577     combination.
1578 
1579     Depending on the needed length, there are two cases:
1580 
1581     1. When the length of the column < max_key_length:
1582 
1583       CREATE TABLE tmp (col VARBINARY(n) NOT NULL, UNIQUE KEY(col));
1584 
1585     2. Otherwise (not a valid SQL syntax but internally supported):
1586 
1587       CREATE TABLE tmp (col VARBINARY NOT NULL, UNIQUE CONSTRAINT(col));
1588 
1589     The code in this function was produced by extraction of relevant parts
1590     from create_tmp_table().
1591 
1592   @return
1593     created table
1594     NULL on error
1595 */
1596 
create_duplicate_weedout_tmp_table(THD * thd,uint uniq_tuple_length_arg,SJ_TMP_TABLE * sjtbl)1597 TABLE *create_duplicate_weedout_tmp_table(THD *thd, uint uniq_tuple_length_arg,
1598                                           SJ_TMP_TABLE *sjtbl) {
1599   MEM_ROOT *mem_root_save, own_root;
1600   TABLE *table;
1601   TABLE_SHARE *share;
1602   Field **reg_field;
1603   KEY_PART_INFO *key_part_info;
1604   KEY *keyinfo;
1605   uchar *group_buff;
1606   uchar *bitmaps;
1607   uint *blob_field;
1608   bool unique_constraint_via_hash_field = false;
1609   Field *field, *key_field, *hash_field = nullptr;
1610   uint null_pack_length;
1611   uchar *null_flags;
1612   uchar *pos;
1613   uint i;
1614 
1615   DBUG_TRACE;
1616   DBUG_ASSERT(!sjtbl || !sjtbl->is_confluent);
1617 
1618   DBUG_EXECUTE_IF("create_duplicate_weedout_tmp_table_error", {
1619     my_error(ER_UNKNOWN_ERROR, MYF(0));
1620     return nullptr;
1621   });
1622 
1623   /* STEP 1: Figure if we'll be using a key or blob+constraint */
1624   if (uniq_tuple_length_arg > CONVERT_IF_BIGGER_TO_BLOB)
1625     unique_constraint_via_hash_field = true;
1626 
1627   /* STEP 2: Allocate memory for temptable description */
1628   init_sql_alloc(key_memory_TABLE, &own_root, TABLE_ALLOC_BLOCK_SIZE, 0);
1629   if (!multi_alloc_root(
1630           &own_root, &table, sizeof(*table), &share, sizeof(*share), &reg_field,
1631           sizeof(Field *) * (1 + 2), &blob_field, sizeof(uint) * 3, &keyinfo,
1632           sizeof(*keyinfo), &key_part_info, sizeof(*key_part_info) * 2,
1633           &group_buff,
1634           (!unique_constraint_via_hash_field ? uniq_tuple_length_arg : 0),
1635           &bitmaps, bitmap_buffer_size(1) * 3, NullS)) {
1636     return nullptr;
1637   }
1638 
1639   /* STEP 3: Create TABLE description */
1640   new (table) TABLE;
1641   memset(reg_field, 0, sizeof(Field *) * 3);
1642   table->init_tmp_table(thd, share, &own_root, nullptr, "weedout-tmp",
1643                         reg_field, blob_field, false);
1644 
1645   mem_root_save = thd->mem_root;
1646   thd->mem_root = &share->mem_root;
1647 
1648   uint reclength = 0;
1649   uint null_count = 0;
1650 
1651   /* Create the field */
1652   if (unique_constraint_via_hash_field) {
1653     Field_longlong *field_ll = new (&share->mem_root)
1654         Field_longlong(sizeof(ulonglong), false, "<hash_field>", true);
1655     if (!field_ll) {
1656       DBUG_ASSERT(thd->is_fatal_error());
1657       goto err;  // Got OOM
1658     }
1659     // Mark hash_field as NOT NULL
1660     field_ll->set_flag(NOT_NULL_FLAG);
1661     *(reg_field++) = hash_field = field_ll;
1662     if (sjtbl) sjtbl->hash_field = field_ll;
1663     table->hash_field = field_ll;
1664     field_ll->table = table;
1665     field_ll->orig_table = table;
1666     share->fields++;
1667     field_ll->set_field_index(0);
1668     reclength = field_ll->pack_length();
1669     table->hidden_field_count++;
1670   }
1671   {
1672     /*
1673       For the sake of uniformity, always use Field_varstring (altough we could
1674       use Field_string for shorter keys)
1675     */
1676     field = new (thd->mem_root) Field_varstring(
1677         uniq_tuple_length_arg, false, "rowids", share, &my_charset_bin);
1678     if (!field) return nullptr;
1679     field->table = table;
1680     field->auto_flags = Field::NONE;
1681     field->set_flag(NOT_NULL_FLAG);
1682     field->set_flag(BINARY_FLAG);
1683     field->set_flag(NO_DEFAULT_VALUE_FLAG);
1684     field->init(table);
1685     field->orig_table = nullptr;
1686     *(reg_field++) = field;
1687     *blob_field = 0;
1688     *reg_field = nullptr;
1689 
1690     field->set_field_index(share->fields);
1691     share->fields++;
1692     share->blob_fields = 0;
1693     reclength += field->pack_length();
1694     null_count++;
1695   }
1696 
1697   /* See also create_tmp_table() */
1698   if (setup_tmp_table_handler(table, 0LL, unique_constraint_via_hash_field,
1699                               false))
1700     goto err;
1701 
1702   null_pack_length = 1;
1703   reclength += null_pack_length;
1704 
1705   share->reclength = reclength;
1706   share->null_bytes = null_pack_length;
1707   share->null_fields = null_count;
1708 
1709   if (alloc_record_buffers(table)) goto err;
1710   setup_tmp_table_column_bitmaps(table, bitmaps);
1711 
1712   null_flags = table->record[0];
1713 
1714   pos = table->record[0] + null_pack_length;
1715   null_count = 1;
1716   for (i = 0, reg_field = table->field; i < share->fields; i++, reg_field++) {
1717     Field *field_r = *reg_field;
1718     uint length;
1719 
1720     relocate_field(field_r, pos, null_flags, &null_count);
1721     length = field_r->pack_length();
1722     pos += length;
1723 
1724     // fix table name in field entry
1725     field_r->table_name = &table->alias;
1726   }
1727 
1728   // Create a key over param->hash_field to enforce unique constraint
1729   if (unique_constraint_via_hash_field) {
1730     KEY *hash_key = keyinfo;
1731     KEY_PART_INFO *hash_kpi = key_part_info;
1732 
1733     share->keys = 1;
1734     table->key_info = share->key_info = hash_key;
1735     hash_key->table = table;
1736     hash_key->key_part = hash_kpi;
1737     hash_key->actual_flags = hash_key->flags = HA_NULL_ARE_EQUAL;
1738     hash_kpi->init_from_field(hash_field);
1739     hash_key->key_length = hash_kpi->store_length;
1740   } else {
1741     DBUG_PRINT("info", ("Creating group key in temporary table"));
1742     share->keys = 1;
1743     table->key_info = table->s->key_info = keyinfo;
1744     keyinfo->key_part = key_part_info;
1745     keyinfo->actual_flags = keyinfo->flags = HA_NOSAME;
1746     keyinfo->key_length = 0;
1747     {
1748       key_part_info->init_from_field(field);
1749       key_part_info->bin_cmp = true;
1750 
1751       key_field = field->new_key_field(thd->mem_root, table, group_buff);
1752       if (!key_field) goto err;
1753       key_part_info->key_part_flag |= HA_END_SPACE_ARE_EQUAL;  // todo need
1754                                                                // this?
1755       keyinfo->key_length += key_part_info->length;
1756     }
1757   }
1758   {
1759     table->key_info->user_defined_key_parts = 1;
1760     table->key_info->usable_key_parts = 1;
1761     table->key_info->actual_key_parts = table->key_info->user_defined_key_parts;
1762     share->key_parts = table->key_info->user_defined_key_parts;
1763     table->key_info->set_rec_per_key_array(nullptr, nullptr);
1764     table->key_info->algorithm = table->file->get_default_index_algorithm();
1765     table->key_info->set_in_memory_estimate(IN_MEMORY_ESTIMATE_UNKNOWN);
1766     table->key_info->name = "weedout_key";
1767   }
1768 
1769   if (thd->is_fatal_error())  // If end of memory
1770     goto err;
1771 
1772   set_real_row_type(table);
1773 
1774   if (instantiate_tmp_table(thd, table)) goto err;
1775 
1776   thd->mem_root = mem_root_save;
1777   return table;
1778 
1779 err:
1780   thd->mem_root = mem_root_save;
1781   table->file->ha_index_or_rnd_end();
1782   free_tmp_table(thd, table); /* purecov: inspected */
1783   return nullptr;             /* purecov: inspected */
1784 }
1785 
1786 /****************************************************************************/
1787 
1788 /**
1789   Create an, optionally reduced, TABLE object with properly set up Field list
1790   from a list of field definitions.
1791 
1792   @details
1793   When is_virtual arg is true:
1794     The created table doesn't have a table handler associated with
1795     it, has no keys, no group/distinct, no copy_funcs array.
1796     The sole purpose of this TABLE object is to use the power of Field
1797     class to read/write data to/from table->record[0]. Then one can store
1798     the record in any container (RB tree, hash, etc).
1799     The table is created in THD mem_root, so are the table's fields.
1800     Consequently, if you don't BLOB fields, you don't need to free it.
1801   When is_virtual is false:
1802     This function creates a normal tmp table out of fields' definitions,
1803     rather than from lst of items. This is the main difference with
1804     create_tmp_table. Also the table created here doesn't do grouping,
1805     doesn't have indexes and copy_funcs/fields. The purpose is to be able to
1806     create result table for table functions out of fields' definitions
1807     without need in intermediate list of items.
1808 
1809   @param thd         connection handle
1810   @param field_list  list of column definitions
1811   @param is_virtual  if true, then it's effectively only a record buffer
1812                        with wrapper, used e.g to store vars in SP
1813                      if false, then a normal table, which can hold
1814                        records, is created
1815   @param select_options options for non-virtual tmp table
1816   @param alias       table's alias
1817 
1818   @return
1819     0 if out of memory, TABLE object in case of success
1820 */
1821 
create_tmp_table_from_fields(THD * thd,List<Create_field> & field_list,bool is_virtual,ulonglong select_options,const char * alias)1822 TABLE *create_tmp_table_from_fields(THD *thd, List<Create_field> &field_list,
1823                                     bool is_virtual, ulonglong select_options,
1824                                     const char *alias) {
1825   uint field_count = field_list.elements;
1826   uint blob_count = 0;
1827   Field **reg_field;
1828   Create_field *cdef; /* column definition */
1829   uint record_length = 0;
1830   uint null_count = 0;   /* number of columns which may be null */
1831   uint null_pack_length; /* NULL representation array length */
1832   uint *blob_field;
1833   uchar *bitmaps;
1834   TABLE *table;
1835   TABLE_SHARE *share;
1836   MEM_ROOT own_root, *m_root;
1837   /*
1838     total_uneven_bit_length is uneven bit length for BIT fields
1839   */
1840   uint total_uneven_bit_length = 0;
1841 
1842   if (!is_virtual) {
1843     init_sql_alloc(key_memory_TABLE, &own_root, TABLE_ALLOC_BLOCK_SIZE, 0);
1844     m_root = &own_root;
1845   } else
1846     m_root = thd->mem_root;
1847 
1848   if (!multi_alloc_root(m_root, &table, sizeof(*table), &share, sizeof(*share),
1849                         &reg_field, (field_count + 1) * sizeof(Field *),
1850                         &blob_field, (field_count + 1) * sizeof(uint), &bitmaps,
1851                         bitmap_buffer_size(field_count) * 3, NullS))
1852     return nullptr;
1853 
1854   new (table) TABLE;
1855   new (share) TABLE_SHARE;
1856   table->init_tmp_table(thd, share, m_root, nullptr, alias, reg_field,
1857                         blob_field, is_virtual);
1858 
1859   /* Create all fields and calculate the total length of record */
1860   List_iterator_fast<Create_field> it(field_list);
1861   uint idx = 0;
1862   while ((cdef = it++)) {
1863     *reg_field =
1864         cdef->is_nullable
1865             ? make_field(*cdef, share, nullptr,
1866                          pointer_cast<uchar *>(const_cast<char *>("")), 1)
1867             : make_field(*cdef, share);
1868     if (!*reg_field) goto error;
1869     (*reg_field)->init(table);
1870     record_length += (*reg_field)->pack_length();
1871     if (!(*reg_field)->is_flag_set(NOT_NULL_FLAG)) null_count++;
1872     (*reg_field)->set_field_index(idx++);
1873     if ((*reg_field)->type() == MYSQL_TYPE_BIT)
1874       total_uneven_bit_length += (*reg_field)->field_length & 7;
1875 
1876     if ((*reg_field)->is_flag_set(BLOB_FLAG))
1877       share->blob_field[blob_count++] = (uint)(reg_field - table->field);
1878 
1879     reg_field++;
1880   }
1881   *reg_field = nullptr;              /* mark the end of the list */
1882   share->blob_field[blob_count] = 0; /* mark the end of the list */
1883   share->blob_fields = blob_count;
1884 
1885   null_pack_length = (null_count + total_uneven_bit_length + 7) / 8;
1886   share->reclength = record_length + null_pack_length;
1887   share->null_bytes = null_pack_length;
1888   share->null_fields = null_count;
1889   share->fields = field_count;
1890 
1891   if (is_virtual) {
1892     /*
1893       When the table is virtual, updates won't be done on the table and
1894       default values won't be stored. Thus no need to allocate buffers for
1895       that.
1896     */
1897     share->rec_buff_length = ALIGN_SIZE(share->reclength + 1);
1898     table->record[0] = (uchar *)thd->alloc(share->rec_buff_length);
1899     if (!table->record[0]) goto error;
1900     if (null_pack_length) {
1901       table->null_flags = table->record[0];
1902       memset(table->record[0], 255, null_pack_length);  // Set null fields
1903     }
1904   } else if (alloc_record_buffers(table))
1905     goto error;
1906 
1907   setup_tmp_table_column_bitmaps(table, bitmaps);
1908 
1909   {
1910     /* Set up field pointers */
1911     uchar *null_flags = table->record[0];
1912     uchar *pos = null_flags + share->null_bytes;
1913     uint null_counter = 0;
1914 
1915     for (reg_field = table->field; *reg_field; ++reg_field) {
1916       Field *field = *reg_field;
1917       relocate_field(field, pos, null_flags, &null_counter);
1918       pos += field->pack_length();
1919     }
1920   }
1921 
1922   if (is_virtual) return table;
1923 
1924   store_record(table, s->default_values);  // Make empty default record
1925 
1926   if (setup_tmp_table_handler(table, select_options, false, false)) goto error;
1927 
1928   return table;
1929 error:
1930   for (reg_field = table->field; *reg_field; ++reg_field) destroy(*reg_field);
1931   return nullptr;
1932 }
1933 
1934 /**
1935   Checks if disk storage engine should be used for temporary table.
1936 
1937   @param table            table to allocate SE for
1938   @param select_options   current select's options
1939   @param force_disk_table true <=> Use InnoDB
1940   @param mem_engine       Selected in-memory storage engine.
1941 
1942   @return
1943     true if disk storage engine should be used
1944     false if disk storage engine is not required
1945  */
use_tmp_disk_storage_engine(TABLE * table,ulonglong select_options,bool force_disk_table,enum_internal_tmp_mem_storage_engine mem_engine)1946 static bool use_tmp_disk_storage_engine(
1947     TABLE *table, ulonglong select_options, bool force_disk_table,
1948     enum_internal_tmp_mem_storage_engine mem_engine) {
1949   THD *thd = table->in_use;
1950   TABLE_SHARE *share = table->s;
1951 
1952   /* Caller needs SE to be disk-based (@see create_tmp_table()). */
1953   if (force_disk_table) {
1954     return true;
1955   }
1956 
1957   /*
1958     During bootstrap, the heap engine is not available, so we force using
1959     disk storage engine. This is especially hit when creating a I_S system
1960     view definition with a UNION in it AND is also when upgrading from
1961     older DD tables which involves execution of UPDATE queries to adjust
1962     metadata of DD tables.
1963   */
1964   if (opt_initialize || thd->is_dd_system_thread()) {
1965     return true;
1966   }
1967 
1968   if (mem_engine == TMP_TABLE_MEMORY) {
1969     /* MEMORY do not support BLOBs */
1970     if (share->blob_fields) {
1971       return true;
1972     }
1973   } else {
1974     DBUG_ASSERT(mem_engine == TMP_TABLE_TEMPTABLE);
1975   }
1976 
1977   /* User said the result would be big, so may not fit in memory */
1978   if ((thd->variables.big_tables) && !(select_options & SELECT_SMALL_RESULT)) {
1979     return true;
1980   }
1981 
1982   return false;
1983 }
1984 
1985 /**
1986   Helper function to create_tmp_table_* family for setting up table's SE
1987 
1988   @param table            table to allocate SE for
1989   @param select_options   current select's options
1990   @param force_disk_table true <=> Use InnoDB
1991   @param schema_table     whether the table is a schema table
1992 
1993   @returns
1994     false on success
1995     true  otherwise
1996 */
setup_tmp_table_handler(TABLE * table,ulonglong select_options,bool force_disk_table,bool schema_table)1997 static bool setup_tmp_table_handler(TABLE *table, ulonglong select_options,
1998                                     bool force_disk_table, bool schema_table) {
1999   THD *thd = table->in_use;
2000 
2001   TABLE_SHARE *share = table->s;
2002   enum_internal_tmp_mem_storage_engine mem_engine =
2003       static_cast<enum_internal_tmp_mem_storage_engine>(
2004           thd->variables.internal_tmp_mem_storage_engine);
2005 
2006   /* Except for special conditions, tmp table engine will be chosen by user. */
2007 
2008   /* For information_schema tables we use the Heap engine because we do
2009   not allow user-created TempTable tables and even though information_schema
2010   tables are not user-created, an ingenious user may execute:
2011   CREATE TABLE myowntemptabletable LIKE information_schema.some; */
2012   if (schema_table && (mem_engine == TMP_TABLE_TEMPTABLE)) {
2013     mem_engine = TMP_TABLE_MEMORY;
2014   }
2015 
2016   if (use_tmp_disk_storage_engine(table, select_options, force_disk_table,
2017                                   mem_engine)) {
2018     share->db_plugin = ha_lock_engine(nullptr, innodb_hton);
2019   } else {
2020     share->db_plugin = nullptr;
2021     switch (mem_engine) {
2022       case TMP_TABLE_TEMPTABLE:
2023         share->db_plugin = ha_lock_engine(nullptr, temptable_hton);
2024         break;
2025       case TMP_TABLE_MEMORY:
2026         share->db_plugin = ha_lock_engine(nullptr, heap_hton);
2027         break;
2028     }
2029     DBUG_ASSERT(share->db_plugin != nullptr);
2030   }
2031 
2032   if (!(table->file =
2033             get_new_handler(share, false, &share->mem_root, share->db_type())))
2034     return true;
2035 
2036   // Update the handler with information about the table object
2037   table->file->change_table_ptr(table, share);
2038   if (table->file->set_ha_share_ref(&share->ha_share)) {
2039     destroy(table->file);
2040     return true;
2041   }
2042 
2043   // Initialize cost model for this table
2044   table->init_cost_model(thd->cost_model());
2045 
2046   return false;
2047 }
2048 
2049 /**
2050   Helper function for create_tmp_table_* family for allocating record buffers
2051 
2052   @note Caller must initialize TABLE_SHARE::reclength and
2053   TABLE_SHARE::null_bytes before calling this function.
2054 
2055   @param table  table to allocate record buffers for
2056 
2057   @returns
2058     false  on success
2059     true   otherwise
2060 */
2061 
alloc_record_buffers(TABLE * table)2062 static bool alloc_record_buffers(TABLE *table) {
2063   TABLE_SHARE *share = table->s;
2064   THD *thd = table->in_use;
2065   /*
2066     Same as MI_UNIQUE_HASH_LENGTH,
2067     allows to exclude "myisam.h" from include files.
2068   */
2069   const int TMP_TABLE_UNIQUE_HASH_LENGTH = 4;
2070   uint alloc_length =
2071       ALIGN_SIZE(share->reclength + TMP_TABLE_UNIQUE_HASH_LENGTH + 1);
2072   share->rec_buff_length = alloc_length;
2073   /*
2074     Note that code in open_table_from_share() relies on the fact that
2075     for optimizer-created temporary tables TABLE_SHARE::default_values
2076     is allocated in a single chuck with TABLE::record[0] for the first
2077     TABLE instance.
2078   */
2079   if (!(table->record[0] = (uchar *)share->mem_root.Alloc(
2080             (alloc_length * 3 + share->null_bytes))))
2081     return true;
2082   table->record[1] = table->record[0] + alloc_length;
2083   share->default_values = table->record[1] + alloc_length;
2084   table->null_flags_saved = share->default_values + alloc_length;
2085   if (share->null_bytes) {
2086     table->null_flags = table->record[0];
2087     memset(table->record[0], 255, share->null_bytes);  // Set null fields
2088   }
2089 
2090   if (thd->variables.tmp_table_size == ~(ulonglong)0)  // No limit
2091     share->max_rows = ~(ha_rows)0;
2092   else
2093     share->max_rows = (ha_rows)(((share->db_type() == heap_hton)
2094                                      ? min(thd->variables.tmp_table_size,
2095                                            thd->variables.max_heap_table_size)
2096                                      : thd->variables.tmp_table_size) /
2097                                 share->reclength);
2098   share->max_rows =
2099       std::max(share->max_rows, ha_rows(1));  // For dummy start options
2100 
2101   return false;
2102 }
2103 
open_tmp_table(TABLE * table)2104 bool open_tmp_table(TABLE *table) {
2105   DBUG_ASSERT(table->s->ref_count() == 1 ||        // not shared, or:
2106               table->s->db_type() == heap_hton ||  // using right engines
2107               table->s->db_type() == temptable_hton ||
2108               table->s->db_type() == innodb_hton);
2109 
2110   int error;
2111   if ((error = table->file->ha_open(table, table->s->table_name.str, O_RDWR,
2112                                     HA_OPEN_TMP_TABLE | HA_OPEN_INTERNAL_TABLE,
2113                                     nullptr))) {
2114     table->file->print_error(error, MYF(0)); /* purecov: inspected */
2115     table->db_stat = 0;
2116     return (true);
2117   }
2118   (void)table->file->ha_extra(HA_EXTRA_QUICK); /* Faster */
2119 
2120   table->set_created();
2121   table->s->tmp_handler_count++;
2122   return false;
2123 }
2124 
2125 /**
2126   Try to create an in-memory temporary table and if not enough space, then
2127   try to create an on-disk one.
2128 
2129   Create a temporary table according to passed description.
2130 
2131   The passed array or MI_COLUMNDEF structures must have this form:
2132 
2133     1. 1-byte column (afaiu for 'deleted' flag) (note maybe not 1-byte
2134        when there are many nullable columns)
2135     2. Table columns
2136     3. One free MI_COLUMNDEF element (*recinfo points here)
2137 
2138   This function may use the free element to create hash column for unique
2139   constraint.
2140 
2141   @param[in,out] table Table object that describes the table to be created
2142 
2143   @retval false OK
2144   @retval true Error
2145 */
create_tmp_table_with_fallback(TABLE * table)2146 static bool create_tmp_table_with_fallback(TABLE *table) {
2147   TABLE_SHARE *share = table->s;
2148 
2149   DBUG_TRACE;
2150 
2151   HA_CREATE_INFO create_info;
2152 
2153   create_info.db_type = table->s->db_type();
2154   create_info.row_type = table->s->row_type;
2155   create_info.options |=
2156       HA_LEX_CREATE_TMP_TABLE | HA_LEX_CREATE_INTERNAL_TMP_TABLE;
2157 
2158   /*
2159     INNODB's fixed length column size is restricted to 1024. Exceeding this can
2160     result in incorrect behavior.
2161   */
2162   if (table->s->db_type() == innodb_hton) {
2163     for (Field **field = table->field; *field; ++field) {
2164       if ((*field)->type() == MYSQL_TYPE_STRING &&
2165           (*field)->key_length() > 1024) {
2166         my_error(ER_TOO_LONG_KEY, MYF(0), 1024);
2167         return true;
2168       }
2169     }
2170   }
2171 
2172   int error =
2173       table->file->create(share->table_name.str, table, &create_info, nullptr);
2174   if (error == HA_ERR_RECORD_FILE_FULL &&
2175       table->s->db_type() == temptable_hton) {
2176     table->file =
2177         get_new_handler(table->s, false, &table->s->mem_root, innodb_hton);
2178     error = table->file->create(share->table_name.str, table, &create_info,
2179                                 nullptr);
2180   }
2181 
2182   if (error) {
2183     table->file->print_error(error, MYF(0)); /* purecov: inspected */
2184     table->db_stat = 0;
2185     return true;
2186   } else {
2187     if (table->s->db_type() != temptable_hton) {
2188       table->in_use->inc_status_created_tmp_disk_tables();
2189     }
2190     return false;
2191   }
2192 }
2193 
trace_tmp_table(Opt_trace_context * trace,const TABLE * table)2194 static void trace_tmp_table(Opt_trace_context *trace, const TABLE *table) {
2195   TABLE_SHARE *s = table->s;
2196   Opt_trace_object trace_tmp(trace, "tmp_table_info");
2197   if (strlen(table->alias) != 0)
2198     trace_tmp.add_utf8_table(table->pos_in_table_list);
2199   else
2200     trace_tmp.add_alnum("table", "intermediate_tmp_table");
2201   QEP_TAB *tab = table->reginfo.qep_tab;
2202   if (tab != nullptr && tab->join() != nullptr)
2203     trace_tmp.add("in_plan_at_position", tab->idx());
2204   trace_tmp.add("columns", s->fields)
2205       .add("row_length", s->reclength)
2206       .add("key_length", table->key_info ? table->key_info->key_length : 0)
2207       .add("unique_constraint", table->hash_field ? true : false)
2208       .add("makes_grouped_rows", table->group != nullptr)
2209       .add("cannot_insert_duplicates", table->is_distinct);
2210 
2211   if (s->db_type() == innodb_hton) {
2212     trace_tmp.add_alnum("location", "disk (InnoDB)");
2213     if (s->db_create_options & HA_OPTION_PACK_RECORD)
2214       trace_tmp.add_alnum("record_format", "packed");
2215     else
2216       trace_tmp.add_alnum("record_format", "fixed");
2217   } else if (table->s->db_type() == temptable_hton) {
2218     trace_tmp.add_alnum("location", "TempTable");
2219   } else {
2220     DBUG_ASSERT(s->db_type() == heap_hton);
2221     trace_tmp.add_alnum("location", "memory (heap)")
2222         .add("row_limit_estimate", s->max_rows);
2223   }
2224 }
2225 
2226 /**
2227   @brief
2228   Instantiates temporary table
2229 
2230   @param  thd             Thread handler
2231   @param  table           Table object that describes the table to be
2232                           instantiated
2233 
2234   @details
2235     Creates tmp table and opens it.
2236 
2237   @return
2238      false - OK
2239      true  - Error
2240 */
2241 
instantiate_tmp_table(THD * thd,TABLE * table)2242 bool instantiate_tmp_table(THD *thd, TABLE *table) {
2243   TABLE_SHARE *const share = table->s;
2244 #ifndef DBUG_OFF
2245   for (uint i = 0; i < share->fields; i++)
2246     DBUG_ASSERT(table->field[i]->gcol_info == nullptr &&
2247                 table->field[i]->stored_in_db);
2248 #endif
2249   thd->inc_status_created_tmp_tables();
2250 
2251   if (share->db_type() == temptable_hton) {
2252     if (create_tmp_table_with_fallback(table)) return true;
2253   } else if (share->db_type() == innodb_hton) {
2254     if (create_tmp_table_with_fallback(table)) return true;
2255     // Make empty record so random data is not written to disk
2256     empty_record(table);
2257   }
2258 
2259   // If a heap table, it's created by open_tmp_table().
2260   if (open_tmp_table(table)) {
2261     /*
2262       Delete table immediately if we fail to open it, so
2263       TABLE::is_created() also implies that table is open.
2264     */
2265     table->file->ha_delete_table(share->table_name.str,
2266                                  nullptr); /* purecov: inspected */
2267     return true;
2268   }
2269 
2270   if (share->first_unused_tmp_key < share->keys) {
2271     /*
2272       Some other clone of this materialized temporary table has defined
2273       "possible" keys; as we are here creating the table in the engine, we must
2274       decide here what to do with them: drop them now, or make them "existing"
2275       now. As the other clone assumes they will be available if the Optimizer
2276       chooses them, we make them existing.
2277     */
2278     share->find_first_unused_tmp_key(Key_map(share->keys));
2279   }
2280 
2281   Opt_trace_context *const trace = &thd->opt_trace;
2282   if (unlikely(trace->is_started())) {
2283     Opt_trace_object wrapper(trace);
2284     Opt_trace_object convert(trace, "creating_tmp_table");
2285     trace_tmp_table(trace, table);
2286   }
2287   return false;
2288 }
2289 
2290 /**
2291   Free TABLE object and release associated resources for
2292   internal temporary table.
2293 */
free_tmp_table(THD * thd,TABLE * entry)2294 void free_tmp_table(THD *thd, TABLE *entry) {
2295   const char *save_proc_info;
2296   DBUG_TRACE;
2297   DBUG_PRINT("enter", ("table: %s", entry->alias));
2298 
2299   save_proc_info = thd->proc_info;
2300   THD_STAGE_INFO(thd, stage_removing_tmp_table);
2301 
2302   filesort_free_buffers(entry, true);
2303 
2304   DBUG_ASSERT(entry->s->tmp_handler_count <= entry->s->ref_count());
2305 
2306   if (entry->is_created()) {
2307     DBUG_ASSERT(entry->s->tmp_handler_count >= 1);
2308     // Table is marked as created only if was successfully opened.
2309     if (--entry->s->tmp_handler_count)
2310       entry->file->ha_close();
2311     else  // no more open 'handler' objects
2312       entry->file->ha_drop_table(entry->s->table_name.str);
2313     entry->set_deleted();
2314   }
2315 
2316   destroy(entry->file);
2317   entry->file = nullptr;
2318 
2319   /* free blobs */
2320   for (Field **ptr = entry->field; *ptr; ptr++) (*ptr)->mem_free();
2321   free_io_cache(entry);
2322 
2323   DBUG_ASSERT(entry->mem_root.allocated_size() == 0);
2324 
2325   DBUG_ASSERT(entry->s->ref_count() >= 1);
2326   if (entry->s->decrement_ref_count() == 0)  // no more TABLE objects
2327   {
2328     plugin_unlock(nullptr, entry->s->db_plugin);
2329     /*
2330       In create_tmp_table(), the share's memroot is allocated inside own_root
2331       and is then made a copy of own_root, so it is inside its memory blocks,
2332       so as soon as we free a memory block the memroot becomes unreadable.
2333       So we need a copy to free it.
2334     */
2335     MEM_ROOT own_root = std::move(entry->s->mem_root);
2336     destroy(entry);
2337     free_root(&own_root, MYF(0));
2338   }
2339 
2340   thd_proc_info(thd, save_proc_info);
2341 }
2342 
2343 /**
2344   If a MEMORY table gets full, create a disk-based table and copy all rows
2345   to this.
2346 
2347   @param thd             THD reference
2348   @param wtable          Table reference being written to
2349   @param error           Reason why inserting into MEMORY table failed.
2350   @param ignore_last_dup If true, ignore duplicate key error for last
2351                          inserted key (see detailed description below).
2352   @param [out] is_duplicate if non-NULL and ignore_last_dup is true,
2353                          return true if last key was a duplicate,
2354                          and false otherwise.
2355 
2356   @details
2357     Function can be called with any error code, but only HA_ERR_RECORD_FILE_FULL
2358     will be handled, all other errors cause a fatal error to be thrown.
2359     The function creates a disk-based temporary table, copies all records
2360     from the MEMORY table into this new table, deletes the old table and
2361     switches to use the new table within the table handle.
2362     The function uses table->record[1] as a temporary buffer while copying.
2363 
2364     The function assumes that table->record[0] contains the row that caused
2365     the error when inserting into the MEMORY table (the "last row").
2366     After all existing rows have been copied to the new table, the last row
2367     is attempted to be inserted as well. If ignore_last_dup is true,
2368     this row can be a duplicate of an existing row without throwing an error.
2369     If is_duplicate is non-NULL, an indication of whether the last row was
2370     a duplicate is returned.
2371 
2372   @note that any index/scan access initialized on the MEMORY 'wtable' is not
2373   replicated to the on-disk table - it's the caller's responsibility.
2374   However, access initialized on other TABLEs, is replicated.
2375 
2376   If 'wtable' has other TABLE clones (example: a multi-referenced or a
2377   recursive CTE), we convert all clones; if an error happens during conversion
2378   of clone B after successfully converting clone A, clone A and B will exit
2379   from the function with a TABLE_SHARE corresponding to the pre-conversion
2380   table ("old" TABLE_SHARE). So A will be inconsistent (for example
2381   s->db_type() will say "MEMORY" while A->file will be a disk-based engine).
2382   However, as all callers bail out, it is reasonable to think that they won't
2383   be using the TABLE_SHARE except in free_tmp_table(); and free_tmp_table()
2384   only uses properties of TABLE_SHARE which are common to the old and new
2385   object (reference counts, MEM_ROOT), so that should work.
2386   Solutions to fix this cleanly:
2387   - allocate new TABLE_SHARE on heap instead of on stack, to be able to
2388   exit with two TABLE_SHAREs (drawback: more heap memory consumption, and need
2389   to verify all exit paths are safe),
2390   - close all TABLEs if error (but then callers and cleanup code may be
2391   surprised to find already-closed tables so they would need fixing).
2392   To lower the risk of error between A and B: we expect most errors will
2393   happen when copying rows (e.g. read or write errors); so we convert 'wtable'
2394   (which does the row copying) first; if it fails, the A-B situation is
2395   avoided and we can properly exit with the old TABLE_SHARE.
2396 
2397   @returns true if error.
2398 */
2399 
create_ondisk_from_heap(THD * thd,TABLE * wtable,int error,bool ignore_last_dup,bool * is_duplicate)2400 bool create_ondisk_from_heap(THD *thd, TABLE *wtable, int error,
2401                              bool ignore_last_dup, bool *is_duplicate) {
2402   int write_err = 0;
2403 #ifndef DBUG_OFF
2404   const uint initial_handler_count = wtable->s->tmp_handler_count;
2405   bool rows_on_disk = false;
2406 #endif
2407   bool table_on_disk = false;
2408   DBUG_TRACE;
2409 
2410   if (error != HA_ERR_RECORD_FILE_FULL) {
2411     /*
2412       We don't want this error to be converted to a warning, e.g. in case of
2413       INSERT IGNORE ... SELECT.
2414     */
2415     wtable->file->print_error(error, MYF(ME_FATALERROR));
2416     return true;
2417   }
2418 
2419   if (wtable->s->db_type() != heap_hton) {
2420     if (wtable->s->db_type() != temptable_hton || temptable_use_mmap) {
2421       /* Do not convert in-memory temporary tables to on-disk
2422       temporary tables if the storage engine is anything other
2423       than the temptable engine or if the user has set the variable
2424       temptable_use_mmap to true to use mmap'ed files for temporary
2425       tables. */
2426       wtable->file->print_error(error, MYF(ME_FATALERROR));
2427       return true;
2428     }
2429 
2430     /* If we are here, then the in-memory temporary tables need
2431     to be converted into on-disk temporary tables */
2432   }
2433 
2434   const char *save_proc_info = thd->proc_info;
2435   THD_STAGE_INFO(thd, stage_converting_heap_to_ondisk);
2436 
2437   TABLE_SHARE *const old_share = wtable->s;
2438   const plugin_ref old_plugin = old_share->db_plugin;
2439   TABLE_SHARE share = std::move(*old_share);
2440   DBUG_ASSERT(share.ha_share == nullptr);
2441 
2442   share.db_plugin = ha_lock_engine(thd, innodb_hton);
2443 
2444   TABLE_LIST *const wtable_list = wtable->pos_in_table_list;
2445   Derived_refs_iterator ref_it(wtable_list);
2446 
2447   if (wtable_list) {
2448     Common_table_expr *cte = wtable_list->common_table_expr();
2449     if (cte) {
2450       int i = 0, found = -1;
2451       TABLE *t;
2452       while ((t = ref_it.get_next())) {
2453         if (t == wtable) {
2454           found = i;
2455           break;
2456         }
2457         ++i;
2458       }
2459       DBUG_ASSERT(found >= 0);
2460       if (found > 0)
2461         // 'wtable' is at position 'found', move it to 0 to convert it first
2462         std::swap(cte->tmp_tables[0], cte->tmp_tables[found]);
2463       ref_it.rewind();
2464     }
2465   }
2466 
2467   TABLE new_table, *table = nullptr;
2468 
2469   while (true) {
2470     if (wtable_list)  // Possibly there are clones
2471     {
2472       table = ref_it.get_next();
2473       if (table == nullptr) break;
2474     } else  // No clones
2475     {
2476       if (table == wtable)  // Already processed
2477         break;
2478       table = wtable;
2479     }
2480 
2481     table->mem_root.Clear();
2482 
2483     // Set up a partial copy of the table.
2484     new_table.record[0] = table->record[0];
2485     new_table.record[1] = table->record[1];
2486     new_table.field = table->field;
2487     new_table.key_info = table->key_info;
2488     new_table.in_use = table->in_use;
2489     new_table.db_stat = table->db_stat;
2490     new_table.key_info = table->key_info;
2491     new_table.hash_field = table->hash_field;
2492     new_table.group = table->group;
2493     new_table.is_distinct = table->is_distinct;
2494     new_table.alias = table->alias;
2495     new_table.pos_in_table_list = table->pos_in_table_list;
2496     new_table.reginfo = table->reginfo;
2497     new_table.read_set = table->read_set;
2498     new_table.write_set = table->write_set;
2499 
2500     new_table.s = &share;  // New table points to new share
2501 
2502     if (!(new_table.file = get_new_handler(
2503               &share, false, &new_table.s->mem_root, new_table.s->db_type())))
2504       goto err_after_proc_info; /* purecov: inspected */
2505     if (new_table.file->set_ha_share_ref(&share.ha_share))
2506       goto err_after_alloc; /* purecov: inspected */
2507 
2508     /* Fix row type which might have changed with SE change. */
2509     set_real_row_type(&new_table);
2510 
2511     if (!table_on_disk) {
2512       if (create_tmp_table_with_fallback(&new_table))
2513         goto err_after_alloc; /* purecov: inspected */
2514 
2515       table_on_disk = true;
2516     }
2517 
2518     bool rec_ref_w_open_cursor = false, psi_batch_started = false;
2519 
2520     if (table->is_created()) {
2521       // Close it, drop it, and open a new one in the disk-based engine.
2522 
2523       if (open_tmp_table(&new_table))
2524         goto err_after_create; /* purecov: inspected */
2525 
2526       if (table->file->indexes_are_disabled())
2527         new_table.file->ha_disable_indexes(HA_KEY_SWITCH_ALL);
2528 
2529       if (table == wtable) {
2530         // The table receiving writes; migrate rows before closing/dropping.
2531 
2532         if (unlikely(thd->opt_trace.is_started())) {
2533           Opt_trace_context *trace = &thd->opt_trace;
2534           Opt_trace_object wrapper(trace);
2535           Opt_trace_object convert(trace, "converting_tmp_table_to_ondisk");
2536           DBUG_ASSERT(error == HA_ERR_RECORD_FILE_FULL);
2537           convert.add_alnum("cause", "memory_table_size_exceeded");
2538           trace_tmp_table(trace, &new_table);
2539         }
2540 
2541         table->file->ha_index_or_rnd_end();
2542 
2543         if ((write_err = table->file->ha_rnd_init(true))) {
2544           /* purecov: begin inspected */
2545           table->file->print_error(write_err, MYF(ME_FATALERROR));
2546           write_err = 0;
2547           goto err_after_open;
2548           /* purecov: end */
2549         }
2550 
2551         if (table->no_rows) {
2552           new_table.file->ha_extra(HA_EXTRA_NO_ROWS);
2553           new_table.no_rows = true;
2554         }
2555 
2556         /*
2557           copy all old rows from heap table to on-disk table
2558           This is the only code that uses record[1] to read/write but this
2559           is safe as this is a temporary on-disk table without timestamp/
2560           autoincrement or partitioning.
2561         */
2562         while (!table->file->ha_rnd_next(new_table.record[1])) {
2563           write_err = new_table.file->ha_write_row(new_table.record[1]);
2564           DBUG_EXECUTE_IF("raise_error", write_err = HA_ERR_FOUND_DUPP_KEY;);
2565           if (write_err) goto err_after_open;
2566         }
2567         /* copy row that filled HEAP table */
2568         if ((write_err = new_table.file->ha_write_row(table->record[0]))) {
2569           if (!new_table.file->is_ignorable_error(write_err) ||
2570               !ignore_last_dup)
2571             goto err_after_open;
2572           if (is_duplicate) *is_duplicate = true;
2573         } else {
2574           if (is_duplicate) *is_duplicate = false;
2575         }
2576 
2577         (void)table->file->ha_rnd_end();
2578 #ifndef DBUG_OFF
2579         rows_on_disk = true;
2580 #endif
2581       }
2582 
2583       /* remove heap table and change to use on-disk table */
2584 
2585       // TODO(sgunders): Move this into MaterializeIterator when we remove the
2586       // pre-iterator executor.
2587       if (table->pos_in_table_list &&
2588           table->pos_in_table_list->is_recursive_reference() &&
2589           table->file->inited) {
2590         /*
2591           Due to the last condition, this is guaranteed to be a recursive
2592           reference belonging to the unit which 'wtable' materializes, and not
2593           to the unit of another non-recursive reference (indeed, this other
2594           reference will re-use the rows of 'wtable', i.e. not execute its
2595           unit).
2596           This reference has opened a cursor.
2597           In the 'tmp_tables' list, 'wtable' is always before such recursive
2598           reference, as setup_materialized_derived_tmp_table() runs before
2599           substitute_recursive_reference(). So, we know the disk-based rows
2600           already exist at this point.
2601         */
2602         DBUG_ASSERT(rows_on_disk);
2603         (void)table->file->ha_rnd_end();
2604         rec_ref_w_open_cursor = true;
2605       }
2606 
2607       psi_batch_started = table->file->end_psi_batch_mode_if_started();
2608 
2609       // Close the in-memory table
2610       if (table->s->db_type() == temptable_hton) {
2611         /* Drop the in-memory temptable.
2612         This code can execute only if mmap'ed temporary
2613         files were disabled using temptable_use_mmap variable */
2614         DBUG_ASSERT(temptable_use_mmap == false);
2615         table->file->ha_drop_table(table->s->table_name.str);
2616       } else {
2617         // Closing the MEMORY table drops it if its ref count is down to zero
2618         (void)table->file->ha_close();
2619       }
2620       share.tmp_handler_count--;
2621     }
2622 
2623     /*
2624       Replace the guts of the old table with the new one, although keeping
2625       most members.
2626     */
2627     destroy(table->file);
2628     table->s = new_table.s;
2629     table->file = new_table.file;
2630     table->db_stat = new_table.db_stat;
2631     table->in_use = new_table.in_use;
2632     table->no_rows = new_table.no_rows;
2633     table->record[0] = new_table.record[0];
2634     table->record[1] = new_table.record[1];
2635     table->mem_root = std::move(new_table.mem_root);
2636 
2637     /*
2638       Depending on if this TABLE clone is early/late in optimization, or in
2639       execution, it has a JOIN_TAB or a QEP_TAB or none.
2640     */
2641     QEP_TAB *qep_tab = table->reginfo.qep_tab;
2642     QEP_shared_owner *tab;
2643     if (qep_tab)
2644       tab = qep_tab;
2645     else
2646       tab = table->reginfo.join_tab;
2647 
2648     /* Update quick select, if any. */
2649     if (tab && tab->quick()) {
2650       DBUG_ASSERT(table->pos_in_table_list->uses_materialization());
2651       tab->quick()->set_handler(table->file);
2652     }
2653 
2654     // TODO(sgunders): Move this into MaterializeIterator when we remove the
2655     // pre-iterator executor.
2656     if (rec_ref_w_open_cursor) {
2657       /*
2658         The table just changed from MEMORY to INNODB. 'table' is a reader and
2659         had an open cursor to the MEMORY table. We closed the cursor, now need
2660         to open it to InnoDB and re-position it at the same row as before.
2661         Row positions (returned by handler::position()) are different in
2662         MEMORY and InnoDB - so the MEMORY row and InnoDB row have differing
2663         positions.
2664         We had read N rows of the MEMORY table, need to re-position our
2665         cursor after the same N rows in the InnoDB table.
2666       */
2667       if (psi_batch_started) table->file->start_psi_batch_mode();
2668     }
2669 
2670     // Point 'table' back to old_share; *old_share will be updated after loop.
2671     table->s = old_share;
2672     /*
2673       Update share-dependent pointers cached in 'table->file' and in
2674       read_set/write_set.
2675     */
2676     table->file->change_table_ptr(table, table->s);
2677     table->file->set_ha_share_ref(&table->s->ha_share);
2678     table->use_all_columns();
2679 
2680   }  // End of tables-processing loop
2681 
2682   plugin_unlock(nullptr, old_plugin);
2683   share.db_plugin = my_plugin_lock(nullptr, &share.db_plugin);
2684   *old_share = std::move(share);
2685 
2686   /*
2687     Now old_share is new, and all TABLEs in Derived_refs_iterator point to
2688     it, and so do their table->file: everything is consistent.
2689   */
2690 
2691   DBUG_ASSERT(initial_handler_count == wtable->s->tmp_handler_count);
2692 
2693   if (save_proc_info)
2694     thd_proc_info(thd, (!strcmp(save_proc_info, "Copying to tmp table")
2695                             ? "Copying to tmp table on disk"
2696                             : save_proc_info));
2697   return false;
2698 
2699 err_after_open:
2700   if (write_err) {
2701     DBUG_PRINT("error", ("Got error: %d", write_err));
2702     new_table.file->print_error(write_err, MYF(0));
2703   }
2704   if (table->file->inited) (void)table->file->ha_rnd_end();
2705   (void)new_table.file->ha_close();
2706 err_after_create:
2707   new_table.file->ha_delete_table(new_table.s->table_name.str, nullptr);
2708 err_after_alloc:
2709   destroy(new_table.file);
2710 err_after_proc_info:
2711   thd_proc_info(thd, save_proc_info);
2712   // New share took control of old share mem_root; regain control:
2713   old_share->mem_root = std::move(share.mem_root);
2714   return true;
2715 }
2716 
2717 /**
2718   Encode an InnoDB PK in 6 bytes, high-byte first; like
2719   InnoDB's dict_sys_write_row_id() does.
2720   @param rowid_bytes  where to store the result
2721   @param length       how many available bytes in rowid_bytes
2722   @param row_num      PK to encode
2723 */
encode_innodb_position(uchar * rowid_bytes,uint length MY_ATTRIBUTE ((unused)),ha_rows row_num)2724 void encode_innodb_position(uchar *rowid_bytes,
2725                             uint length MY_ATTRIBUTE((unused)),
2726                             ha_rows row_num) {
2727   DBUG_ASSERT(length == 6);
2728   for (int i = 0; i < 6; i++)
2729     rowid_bytes[i] = (uchar)(row_num >> ((5 - i) * 8));
2730 }
2731 
2732 /**
2733   Helper function for create_ondisk_from_heap().
2734 
2735   Our InnoDB on-disk intrinsic table uses an autogenerated
2736   auto-incrementing primary key:
2737   - first inserted row has pk=1 (see
2738   dict_table_get_next_table_sess_row_id()), second has pk=2, etc
2739   - ha_rnd_next uses a PK index scan so returns rows in PK order
2740   - position() returns the PK
2741   - ha_rnd_pos() takes the PK in input.
2742 
2743   @param table   table read by cursor
2744   @param row_num function should position on the row_num'th row in insertion
2745   order.
2746 */
reposition_innodb_cursor(TABLE * table,ha_rows row_num)2747 bool reposition_innodb_cursor(TABLE *table, ha_rows row_num) {
2748   DBUG_ASSERT(table->s->db_type() == innodb_hton);
2749   if (table->file->ha_rnd_init(false)) return true; /* purecov: inspected */
2750   // Per the explanation above, the wanted InnoDB row has PK=row_num.
2751   uchar rowid_bytes[6];
2752   encode_innodb_position(rowid_bytes, sizeof(rowid_bytes), row_num);
2753   /*
2754     Go to the row, and discard the row. That places the cursor at
2755     the same row as before the engine conversion, so that rnd_next() will
2756     read the (row_num+1)th row.
2757   */
2758   return table->file->ha_rnd_pos(table->record[0], rowid_bytes);
2759 }
2760