1 /* Copyright (c) 2000, 2021, Oracle and/or its affiliates.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
22 
23 #include "heapdef.h"
24 #include <mysql_com.h>
25 #include <mysqld_error.h>
26 
27 static int keys_compare(heap_rb_param *param, uchar *key1, uchar *key2);
28 static void init_block(HP_BLOCK *block,uint chunk_length, ulong min_records,
29 		       ulong max_records);
30 
31 #define FIXED_REC_OVERHEAD (sizeof(uchar))
32 #define VARIABLE_REC_OVERHEAD (sizeof(uchar **) + sizeof(uchar))
33 
34 /* Minimum size that a chunk can take, 12 bytes on 32bit, 24 bytes on 64bit */
35 #define VARIABLE_MIN_CHUNK_SIZE                                         \
36   ((sizeof(uchar **) + VARIABLE_REC_OVERHEAD + sizeof(uchar **) - 1) &  \
37    ~(sizeof(uchar **) - 1))
38 
39 /* Create a heap table */
40 
heap_create(const char * name,HP_CREATE_INFO * create_info,HP_SHARE ** res,my_bool * created_new_share)41 int heap_create(const char *name, HP_CREATE_INFO *create_info,
42                 HP_SHARE **res, my_bool *created_new_share)
43 {
44   uint i, j, key_segs, max_length, length;
45   HP_SHARE *share= 0;
46   HA_KEYSEG *keyseg;
47   HP_KEYDEF *keydef= create_info->keydef;
48   uint reclength= create_info->reclength;
49   uint keys= create_info->keys;
50   ulong min_records= create_info->min_records;
51   ulong max_records= create_info->max_records;
52   ulong max_rows_for_stated_memory;
53   DBUG_ENTER("heap_create");
54 
55   if (!create_info->internal_table)
56   {
57     mysql_mutex_lock(&THR_LOCK_heap);
58     share= hp_find_named_heap(name);
59     if (share && share->open_count == 0)
60     {
61       hp_free(share);
62       share= 0;
63     }
64   }
65   *created_new_share= (share == NULL);
66 
67   if (!share)
68   {
69     uint chunk_dataspace_length, chunk_length, is_variable_size;
70     uint fixed_data_length, fixed_column_count;
71     HP_KEYDEF *keyinfo;
72     DBUG_PRINT("info",("Initializing new table"));
73 
74     if (create_info->max_chunk_size)
75     {
76       uint configured_chunk_size= create_info->max_chunk_size;
77 
78       /* User requested variable-size records, let's see if they're possible */
79 
80       if (configured_chunk_size < create_info->fixed_data_size)
81       {
82         /*
83           The resulting chunk_size cannot be smaller than fixed data part
84           at the start of the first chunk which allows faster copying
85           with a single memcpy().
86         */
87         my_error(ER_CANT_USE_OPTION_HERE, MYF(0), "key_block_size");
88         goto err;
89       }
90 
91       if (reclength > configured_chunk_size + VARIABLE_REC_OVERHEAD ||
92 	  create_info->blobs > 0)
93       {
94         /*
95           Allow variable size only if we're saving some space, i.e.
96           if a fixed-size record would take more space than variable-size
97           one plus the variable-size overhead.
98           There has to be at least one field after indexed fields.
99           Note that NULL bits are already included in key_part_size.
100         */
101         is_variable_size= 1;
102         chunk_dataspace_length= configured_chunk_size;
103       }
104       else
105       {
106         /* max_chunk_size is near the full reclength, let's use fixed size */
107         is_variable_size= 0;
108         chunk_dataspace_length= reclength;
109       }
110     }
111     else if ((create_info->is_dynamic && reclength >
112               256 + VARIABLE_REC_OVERHEAD)
113              || create_info->blobs > 0)
114     {
115       /*
116         User asked for dynamic records - use 256 as the chunk size, if that
117         will may save some memory. Otherwise revert to fixed size format.
118       */
119       if ((create_info->fixed_data_size + VARIABLE_REC_OVERHEAD) > 256)
120         chunk_dataspace_length= create_info->fixed_data_size;
121       else
122         chunk_dataspace_length= 256 - VARIABLE_REC_OVERHEAD;
123 
124       is_variable_size= 1;
125     }
126     else
127     {
128       /*
129         If max_chunk_size is not specified, put the whole record in one chunk
130       */
131       is_variable_size= 0;
132       chunk_dataspace_length= reclength;
133     }
134 
135     if (is_variable_size)
136     {
137       uint has_variable_fields= 0;
138 
139       fixed_column_count= create_info->fixed_key_fieldnr;
140 
141       /* Check whether we have any BLOB columns before key data, as currently
142       this is not supported */
143       for (i= 0; i < fixed_column_count; i++)
144       {
145         HP_COLUMNDEF *column= create_info->columndef + i;
146         if (is_blob_column(column))
147         {
148           my_error(ER_TABLE_CANT_HANDLE_BLOB, MYF(0));
149           goto err;
150         }
151       }
152 
153       /* Check whether we have any variable size records past key data */
154       fixed_data_length= create_info->fixed_data_size;
155       assert(i == create_info->fixed_key_fieldnr);
156       for (; i < create_info->columns; i++)
157       {
158         HP_COLUMNDEF *column= create_info->columndef + i;
159 	if ((column->type == MYSQL_TYPE_VARCHAR &&
160 	     (column->length - column->length_bytes) >= 32) ||
161             is_blob_column(column))
162         {
163             /*
164               The field has to be either blob or >= 5.0.3 true VARCHAR
165               and have substantial length.
166               TODO: do we want to calculate minimum length?
167             */
168             has_variable_fields= 1;
169             break;
170         }
171 
172         if (has_variable_fields)
173         {
174           break;
175         }
176 
177         if ((column->offset + column->length) <= chunk_dataspace_length)
178         {
179           /* Still no variable-size columns, add one fixed-length */
180           fixed_column_count= i + 1;
181           fixed_data_length= column->offset + column->length;
182         }
183       }
184 
185       if (!has_variable_fields && create_info->blobs == 0)
186       {
187         /*
188           There is no need to use variable-size records without variable-size
189           columns.
190           Reset sizes if it's not variable size anymore.
191         */
192         is_variable_size= 0;
193         chunk_dataspace_length= reclength;
194         fixed_data_length= reclength;
195         fixed_column_count= create_info->columns;
196       }
197     }
198     else
199     {
200       fixed_data_length= reclength;
201       fixed_column_count= create_info->columns;
202     }
203 
204     /*
205       We store uchar* del_link inside the data area of deleted records,
206       so the data length should be at least sizeof(uchar*)
207     */
208     set_if_bigger(chunk_dataspace_length, sizeof (uchar **));
209 
210     if (is_variable_size)
211     {
212       chunk_length= chunk_dataspace_length + VARIABLE_REC_OVERHEAD;
213     }
214     else
215     {
216       chunk_length= chunk_dataspace_length + FIXED_REC_OVERHEAD;
217     }
218 
219     /* Align chunk length to the next pointer */
220     chunk_length= (uint) (chunk_length + sizeof(uchar **) - 1) &
221       ~(sizeof(uchar **) - 1);
222 
223     for (i= key_segs= max_length= 0, keyinfo= keydef; i < keys; i++, keyinfo++)
224     {
225       memset(&keyinfo->block, 0, sizeof(keyinfo->block));
226       memset(&keyinfo->rb_tree, 0, sizeof(keyinfo->rb_tree));
227       for (j= length= 0; j < keyinfo->keysegs; j++)
228       {
229 	length+= keyinfo->seg[j].length;
230 	if (keyinfo->seg[j].null_bit)
231 	{
232 	  length++;
233 	  if (!(keyinfo->flag & HA_NULL_ARE_EQUAL))
234 	    keyinfo->flag|= HA_NULL_PART_KEY;
235 	  if (keyinfo->algorithm == HA_KEY_ALG_BTREE)
236 	    keyinfo->rb_tree.size_of_element++;
237 	}
238 	switch (keyinfo->seg[j].type) {
239         case HA_KEYTYPE_VARBINARY1:
240         case HA_KEYTYPE_VARTEXT1:
241         case HA_KEYTYPE_VARBINARY2:
242         case HA_KEYTYPE_VARTEXT2:
243           /*
244             For BTREE algorithm, key length, greater than or equal
245             to 255, is packed on 3 bytes.
246           */
247           if(keyinfo->algorithm == HA_KEY_ALG_BTREE)
248             length += size_to_store_key_length(keyinfo->seg[j].length);
249           else
250             length += 2;
251           break;
252 	default:
253 	  break;
254 	}
255       }
256       keyinfo->length= length;
257       length+= keyinfo->rb_tree.size_of_element +
258 	       ((keyinfo->algorithm == HA_KEY_ALG_BTREE) ? sizeof(uchar*) : 0);
259       if (length > max_length)
260 	max_length= length;
261       key_segs+= keyinfo->keysegs;
262       if (keyinfo->algorithm == HA_KEY_ALG_BTREE)
263       {
264         key_segs++; /* additional HA_KEYTYPE_END segment */
265         if (keyinfo->flag & HA_VAR_LENGTH_KEY)
266           keyinfo->get_key_length= hp_rb_var_key_length;
267         else if (keyinfo->flag & HA_NULL_PART_KEY)
268           keyinfo->get_key_length= hp_rb_null_key_length;
269         else
270           keyinfo->get_key_length= hp_rb_key_length;
271       }
272     }
273     if (!(share= (HP_SHARE*) my_malloc(hp_key_memory_HP_SHARE,
274                                        (uint) sizeof(HP_SHARE)+
275 				       keys*sizeof(HP_KEYDEF)+
276                                        (create_info->columns *
277                                         sizeof(HP_COLUMNDEF)) +
278 				       key_segs*sizeof(HA_KEYSEG),
279 				       MYF(MY_ZEROFILL))))
280       goto err;
281 
282     /*
283       Max_records is used for estimating block sizes and for enforcement.
284       Calculate the very maximum number of rows (if everything was one chunk)
285       and then take either that value or configured max_records (pick smallest
286       one).
287     */
288     max_rows_for_stated_memory= (ha_rows) (create_info->max_table_size /
289                                            (create_info->keys_memory_size +
290                                             chunk_length));
291     max_records = ((max_records && max_records < max_rows_for_stated_memory) ?
292                    max_records : max_rows_for_stated_memory);
293 
294     share->column_defs= (HP_COLUMNDEF*) (share + 1);
295     memcpy(share->column_defs, create_info->columndef,
296            (size_t) (sizeof(create_info->columndef[0]) *
297                      create_info->columns));
298 
299     share->keydef= (HP_KEYDEF*) (share->column_defs + create_info->columns);
300     share->key_stat_version= 1;
301     keyseg= (HA_KEYSEG*) (share->keydef + keys);
302     init_block(&share->recordspace.block, chunk_length, min_records,
303                max_records);
304 	/* Fix keys */
305     memcpy(share->keydef, keydef, (size_t) (sizeof(keydef[0]) * keys));
306     for (i= 0, keyinfo= share->keydef; i < keys; i++, keyinfo++)
307     {
308       keyinfo->seg= keyseg;
309       memcpy(keyseg, keydef[i].seg,
310 	     (size_t) (sizeof(keyseg[0]) * keydef[i].keysegs));
311       keyseg+= keydef[i].keysegs;
312 
313       if (keydef[i].algorithm == HA_KEY_ALG_BTREE)
314       {
315 	/* additional HA_KEYTYPE_END keyseg */
316 	keyseg->type=     HA_KEYTYPE_END;
317 	keyseg->length=   sizeof(uchar*);
318 	keyseg->flag=     0;
319 	keyseg->null_bit= 0;
320 	keyseg++;
321 
322 	init_tree(&keyinfo->rb_tree, 0, 0, sizeof(uchar*),
323 		  (qsort_cmp2)keys_compare, 1, NULL, NULL);
324 	keyinfo->delete_key= hp_rb_delete_key;
325 	keyinfo->write_key= hp_rb_write_key;
326       }
327       else
328       {
329 	init_block(&keyinfo->block, sizeof(HASH_INFO), min_records,
330 		   max_records);
331 	keyinfo->delete_key= hp_delete_key;
332 	keyinfo->write_key= hp_write_key;
333         keyinfo->hash_buckets= 0;
334       }
335       if ((keyinfo->flag & HA_AUTO_KEY) && create_info->with_auto_increment)
336         share->auto_key= i + 1;
337     }
338     share->min_records= min_records;
339     share->max_records= max_records;
340     share->max_table_size= create_info->max_table_size;
341     share->index_length= 0;
342     share->blength= 1;
343     share->keys= keys;
344     share->max_key_length= max_length;
345     share->column_count= create_info->columns;
346     share->changed= 0;
347     share->auto_key= create_info->auto_key;
348     share->auto_key_type= create_info->auto_key_type;
349     share->auto_increment= create_info->auto_increment;
350     share->create_time= (long) time((time_t*) 0);
351 
352     share->fixed_data_length= fixed_data_length;
353     share->fixed_column_count= fixed_column_count;
354     share->blobs= create_info->blobs;
355 
356     share->recordspace.chunk_length= chunk_length;
357     share->recordspace.chunk_dataspace_length= chunk_dataspace_length;
358     share->recordspace.is_variable_size= is_variable_size;
359     share->recordspace.total_data_length= 0;
360 
361     if (is_variable_size) {
362       share->recordspace.offset_link= chunk_dataspace_length;
363       share->recordspace.offset_status= share->recordspace.offset_link +
364         sizeof(uchar **);
365     } else {
366       /* Make it likely to fail if anyone uses this offset */
367       share->recordspace.offset_link= 1 << 22;
368       share->recordspace.offset_status= chunk_dataspace_length;
369     }
370 
371     /* Must be allocated separately for rename to work */
372     if (!(share->name= my_strdup(hp_key_memory_HP_SHARE,
373                                  name, MYF(0))))
374     {
375       my_free(share);
376       goto err;
377     }
378     if (!create_info->internal_table)
379     {
380       /*
381         Do not initialize THR_LOCK object for internal temporary tables.
382         It is not needed for such tables. Calling thr_lock_init() can
383         cause scalability issues since it acquires global lock.
384       */
385       thr_lock_init(&share->lock);
386       share->open_list.data= (void*) share;
387       heap_share_list= list_add(heap_share_list,&share->open_list);
388     }
389     else
390       share->delete_on_close= 1;
391   }
392   if (!create_info->internal_table)
393   {
394     if (create_info->pin_share)
395       ++share->open_count;
396     mysql_mutex_unlock(&THR_LOCK_heap);
397   }
398 
399   *res= share;
400   DBUG_RETURN(0);
401 
402 err:
403   if (!create_info->internal_table)
404     mysql_mutex_unlock(&THR_LOCK_heap);
405   DBUG_RETURN(1);
406 } /* heap_create */
407 
408 
keys_compare(heap_rb_param * param,uchar * key1,uchar * key2)409 static int keys_compare(heap_rb_param *param, uchar *key1, uchar *key2)
410 {
411   uint not_used[2];
412   return ha_key_cmp(param->keyseg, key1, key2, param->key_length,
413 		    param->search_flag, not_used);
414 }
415 
init_block(HP_BLOCK * block,uint chunk_length,ulong min_records,ulong max_records)416 static void init_block(HP_BLOCK *block, uint chunk_length, ulong min_records,
417 		       ulong max_records)
418 {
419   uint i,recbuffer,records_in_block;
420 
421   max_records= MY_MAX(min_records, max_records);
422   if (!max_records)
423     max_records= 1000;			/* As good as quess as anything */
424   /*
425     We want to start each chunk at 8 bytes boundary, round recbuffer to the
426     next 8.
427   */
428   recbuffer= (uint) (chunk_length + sizeof(uchar**) - 1) &
429     ~(sizeof(uchar**) - 1);
430   records_in_block= max_records / 10;
431   if (records_in_block < 10 && max_records)
432     records_in_block= 10;
433   if (!records_in_block || (ulonglong) records_in_block * recbuffer >
434       (my_default_record_cache_size-sizeof(HP_PTRS)*HP_MAX_LEVELS))
435     records_in_block= (my_default_record_cache_size - sizeof(HP_PTRS) *
436 		      HP_MAX_LEVELS) / recbuffer + 1;
437   block->records_in_block= records_in_block;
438   block->recbuffer= recbuffer;
439   block->last_allocated= 0L;
440 
441   for (i= 0; i <= HP_MAX_LEVELS; i++)
442     block->level_info[i].records_under_level=
443       (!i ? 1 : i == 1 ? records_in_block :
444        HP_PTRS_IN_NOD * block->level_info[i - 1].records_under_level);
445 }
446 
447 
heap_try_free(HP_SHARE * share)448 static inline void heap_try_free(HP_SHARE *share)
449 {
450   if (share->open_count == 0)
451     hp_free(share);
452   else
453     share->delete_on_close= 1;
454 }
455 
456 
heap_delete_table(const char * name)457 int heap_delete_table(const char *name)
458 {
459   int result;
460   HP_SHARE *share;
461   DBUG_ENTER("heap_delete_table");
462 
463   mysql_mutex_lock(&THR_LOCK_heap);
464   if ((share= hp_find_named_heap(name)))
465   {
466     heap_try_free(share);
467     result= 0;
468   }
469   else
470   {
471     result= ENOENT;
472     set_my_errno(result);
473   }
474   mysql_mutex_unlock(&THR_LOCK_heap);
475   DBUG_RETURN(result);
476 }
477 
478 
heap_drop_table(HP_INFO * info)479 void heap_drop_table(HP_INFO *info)
480 {
481   DBUG_ENTER("heap_drop_table");
482   mysql_mutex_lock(&THR_LOCK_heap);
483   heap_try_free(info->s);
484   mysql_mutex_unlock(&THR_LOCK_heap);
485   DBUG_VOID_RETURN;
486 }
487 
488 
hp_free(HP_SHARE * share)489 void hp_free(HP_SHARE *share)
490 {
491   my_bool not_internal_table= (share->open_list.data != NULL);
492   if (not_internal_table)                    /* If not internal table */
493     heap_share_list= list_delete(heap_share_list, &share->open_list);
494   hp_clear(share);			/* Remove blocks from memory */
495   if (not_internal_table)
496     thr_lock_delete(&share->lock);
497   my_free(share->name);
498   my_free(share);
499   return;
500 }
501