1 /* Copyright (c) 2000, 2021, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23 #include "heapdef.h"
24 #include <mysql_com.h>
25 #include <mysqld_error.h>
26
27 static int keys_compare(heap_rb_param *param, uchar *key1, uchar *key2);
28 static void init_block(HP_BLOCK *block,uint chunk_length, ulong min_records,
29 ulong max_records);
30
31 #define FIXED_REC_OVERHEAD (sizeof(uchar))
32 #define VARIABLE_REC_OVERHEAD (sizeof(uchar **) + sizeof(uchar))
33
34 /* Minimum size that a chunk can take, 12 bytes on 32bit, 24 bytes on 64bit */
35 #define VARIABLE_MIN_CHUNK_SIZE \
36 ((sizeof(uchar **) + VARIABLE_REC_OVERHEAD + sizeof(uchar **) - 1) & \
37 ~(sizeof(uchar **) - 1))
38
39 /* Create a heap table */
40
heap_create(const char * name,HP_CREATE_INFO * create_info,HP_SHARE ** res,my_bool * created_new_share)41 int heap_create(const char *name, HP_CREATE_INFO *create_info,
42 HP_SHARE **res, my_bool *created_new_share)
43 {
44 uint i, j, key_segs, max_length, length;
45 HP_SHARE *share= 0;
46 HA_KEYSEG *keyseg;
47 HP_KEYDEF *keydef= create_info->keydef;
48 uint reclength= create_info->reclength;
49 uint keys= create_info->keys;
50 ulong min_records= create_info->min_records;
51 ulong max_records= create_info->max_records;
52 ulong max_rows_for_stated_memory;
53 DBUG_ENTER("heap_create");
54
55 if (!create_info->internal_table)
56 {
57 mysql_mutex_lock(&THR_LOCK_heap);
58 share= hp_find_named_heap(name);
59 if (share && share->open_count == 0)
60 {
61 hp_free(share);
62 share= 0;
63 }
64 }
65 *created_new_share= (share == NULL);
66
67 if (!share)
68 {
69 uint chunk_dataspace_length, chunk_length, is_variable_size;
70 uint fixed_data_length, fixed_column_count;
71 HP_KEYDEF *keyinfo;
72 DBUG_PRINT("info",("Initializing new table"));
73
74 if (create_info->max_chunk_size)
75 {
76 uint configured_chunk_size= create_info->max_chunk_size;
77
78 /* User requested variable-size records, let's see if they're possible */
79
80 if (configured_chunk_size < create_info->fixed_data_size)
81 {
82 /*
83 The resulting chunk_size cannot be smaller than fixed data part
84 at the start of the first chunk which allows faster copying
85 with a single memcpy().
86 */
87 my_error(ER_CANT_USE_OPTION_HERE, MYF(0), "key_block_size");
88 goto err;
89 }
90
91 if (reclength > configured_chunk_size + VARIABLE_REC_OVERHEAD ||
92 create_info->blobs > 0)
93 {
94 /*
95 Allow variable size only if we're saving some space, i.e.
96 if a fixed-size record would take more space than variable-size
97 one plus the variable-size overhead.
98 There has to be at least one field after indexed fields.
99 Note that NULL bits are already included in key_part_size.
100 */
101 is_variable_size= 1;
102 chunk_dataspace_length= configured_chunk_size;
103 }
104 else
105 {
106 /* max_chunk_size is near the full reclength, let's use fixed size */
107 is_variable_size= 0;
108 chunk_dataspace_length= reclength;
109 }
110 }
111 else if ((create_info->is_dynamic && reclength >
112 256 + VARIABLE_REC_OVERHEAD)
113 || create_info->blobs > 0)
114 {
115 /*
116 User asked for dynamic records - use 256 as the chunk size, if that
117 will may save some memory. Otherwise revert to fixed size format.
118 */
119 if ((create_info->fixed_data_size + VARIABLE_REC_OVERHEAD) > 256)
120 chunk_dataspace_length= create_info->fixed_data_size;
121 else
122 chunk_dataspace_length= 256 - VARIABLE_REC_OVERHEAD;
123
124 is_variable_size= 1;
125 }
126 else
127 {
128 /*
129 If max_chunk_size is not specified, put the whole record in one chunk
130 */
131 is_variable_size= 0;
132 chunk_dataspace_length= reclength;
133 }
134
135 if (is_variable_size)
136 {
137 uint has_variable_fields= 0;
138
139 fixed_column_count= create_info->fixed_key_fieldnr;
140
141 /* Check whether we have any BLOB columns before key data, as currently
142 this is not supported */
143 for (i= 0; i < fixed_column_count; i++)
144 {
145 HP_COLUMNDEF *column= create_info->columndef + i;
146 if (is_blob_column(column))
147 {
148 my_error(ER_TABLE_CANT_HANDLE_BLOB, MYF(0));
149 goto err;
150 }
151 }
152
153 /* Check whether we have any variable size records past key data */
154 fixed_data_length= create_info->fixed_data_size;
155 assert(i == create_info->fixed_key_fieldnr);
156 for (; i < create_info->columns; i++)
157 {
158 HP_COLUMNDEF *column= create_info->columndef + i;
159 if ((column->type == MYSQL_TYPE_VARCHAR &&
160 (column->length - column->length_bytes) >= 32) ||
161 is_blob_column(column))
162 {
163 /*
164 The field has to be either blob or >= 5.0.3 true VARCHAR
165 and have substantial length.
166 TODO: do we want to calculate minimum length?
167 */
168 has_variable_fields= 1;
169 break;
170 }
171
172 if (has_variable_fields)
173 {
174 break;
175 }
176
177 if ((column->offset + column->length) <= chunk_dataspace_length)
178 {
179 /* Still no variable-size columns, add one fixed-length */
180 fixed_column_count= i + 1;
181 fixed_data_length= column->offset + column->length;
182 }
183 }
184
185 if (!has_variable_fields && create_info->blobs == 0)
186 {
187 /*
188 There is no need to use variable-size records without variable-size
189 columns.
190 Reset sizes if it's not variable size anymore.
191 */
192 is_variable_size= 0;
193 chunk_dataspace_length= reclength;
194 fixed_data_length= reclength;
195 fixed_column_count= create_info->columns;
196 }
197 }
198 else
199 {
200 fixed_data_length= reclength;
201 fixed_column_count= create_info->columns;
202 }
203
204 /*
205 We store uchar* del_link inside the data area of deleted records,
206 so the data length should be at least sizeof(uchar*)
207 */
208 set_if_bigger(chunk_dataspace_length, sizeof (uchar **));
209
210 if (is_variable_size)
211 {
212 chunk_length= chunk_dataspace_length + VARIABLE_REC_OVERHEAD;
213 }
214 else
215 {
216 chunk_length= chunk_dataspace_length + FIXED_REC_OVERHEAD;
217 }
218
219 /* Align chunk length to the next pointer */
220 chunk_length= (uint) (chunk_length + sizeof(uchar **) - 1) &
221 ~(sizeof(uchar **) - 1);
222
223 for (i= key_segs= max_length= 0, keyinfo= keydef; i < keys; i++, keyinfo++)
224 {
225 memset(&keyinfo->block, 0, sizeof(keyinfo->block));
226 memset(&keyinfo->rb_tree, 0, sizeof(keyinfo->rb_tree));
227 for (j= length= 0; j < keyinfo->keysegs; j++)
228 {
229 length+= keyinfo->seg[j].length;
230 if (keyinfo->seg[j].null_bit)
231 {
232 length++;
233 if (!(keyinfo->flag & HA_NULL_ARE_EQUAL))
234 keyinfo->flag|= HA_NULL_PART_KEY;
235 if (keyinfo->algorithm == HA_KEY_ALG_BTREE)
236 keyinfo->rb_tree.size_of_element++;
237 }
238 switch (keyinfo->seg[j].type) {
239 case HA_KEYTYPE_VARBINARY1:
240 case HA_KEYTYPE_VARTEXT1:
241 case HA_KEYTYPE_VARBINARY2:
242 case HA_KEYTYPE_VARTEXT2:
243 /*
244 For BTREE algorithm, key length, greater than or equal
245 to 255, is packed on 3 bytes.
246 */
247 if(keyinfo->algorithm == HA_KEY_ALG_BTREE)
248 length += size_to_store_key_length(keyinfo->seg[j].length);
249 else
250 length += 2;
251 break;
252 default:
253 break;
254 }
255 }
256 keyinfo->length= length;
257 length+= keyinfo->rb_tree.size_of_element +
258 ((keyinfo->algorithm == HA_KEY_ALG_BTREE) ? sizeof(uchar*) : 0);
259 if (length > max_length)
260 max_length= length;
261 key_segs+= keyinfo->keysegs;
262 if (keyinfo->algorithm == HA_KEY_ALG_BTREE)
263 {
264 key_segs++; /* additional HA_KEYTYPE_END segment */
265 if (keyinfo->flag & HA_VAR_LENGTH_KEY)
266 keyinfo->get_key_length= hp_rb_var_key_length;
267 else if (keyinfo->flag & HA_NULL_PART_KEY)
268 keyinfo->get_key_length= hp_rb_null_key_length;
269 else
270 keyinfo->get_key_length= hp_rb_key_length;
271 }
272 }
273 if (!(share= (HP_SHARE*) my_malloc(hp_key_memory_HP_SHARE,
274 (uint) sizeof(HP_SHARE)+
275 keys*sizeof(HP_KEYDEF)+
276 (create_info->columns *
277 sizeof(HP_COLUMNDEF)) +
278 key_segs*sizeof(HA_KEYSEG),
279 MYF(MY_ZEROFILL))))
280 goto err;
281
282 /*
283 Max_records is used for estimating block sizes and for enforcement.
284 Calculate the very maximum number of rows (if everything was one chunk)
285 and then take either that value or configured max_records (pick smallest
286 one).
287 */
288 max_rows_for_stated_memory= (ha_rows) (create_info->max_table_size /
289 (create_info->keys_memory_size +
290 chunk_length));
291 max_records = ((max_records && max_records < max_rows_for_stated_memory) ?
292 max_records : max_rows_for_stated_memory);
293
294 share->column_defs= (HP_COLUMNDEF*) (share + 1);
295 memcpy(share->column_defs, create_info->columndef,
296 (size_t) (sizeof(create_info->columndef[0]) *
297 create_info->columns));
298
299 share->keydef= (HP_KEYDEF*) (share->column_defs + create_info->columns);
300 share->key_stat_version= 1;
301 keyseg= (HA_KEYSEG*) (share->keydef + keys);
302 init_block(&share->recordspace.block, chunk_length, min_records,
303 max_records);
304 /* Fix keys */
305 memcpy(share->keydef, keydef, (size_t) (sizeof(keydef[0]) * keys));
306 for (i= 0, keyinfo= share->keydef; i < keys; i++, keyinfo++)
307 {
308 keyinfo->seg= keyseg;
309 memcpy(keyseg, keydef[i].seg,
310 (size_t) (sizeof(keyseg[0]) * keydef[i].keysegs));
311 keyseg+= keydef[i].keysegs;
312
313 if (keydef[i].algorithm == HA_KEY_ALG_BTREE)
314 {
315 /* additional HA_KEYTYPE_END keyseg */
316 keyseg->type= HA_KEYTYPE_END;
317 keyseg->length= sizeof(uchar*);
318 keyseg->flag= 0;
319 keyseg->null_bit= 0;
320 keyseg++;
321
322 init_tree(&keyinfo->rb_tree, 0, 0, sizeof(uchar*),
323 (qsort_cmp2)keys_compare, 1, NULL, NULL);
324 keyinfo->delete_key= hp_rb_delete_key;
325 keyinfo->write_key= hp_rb_write_key;
326 }
327 else
328 {
329 init_block(&keyinfo->block, sizeof(HASH_INFO), min_records,
330 max_records);
331 keyinfo->delete_key= hp_delete_key;
332 keyinfo->write_key= hp_write_key;
333 keyinfo->hash_buckets= 0;
334 }
335 if ((keyinfo->flag & HA_AUTO_KEY) && create_info->with_auto_increment)
336 share->auto_key= i + 1;
337 }
338 share->min_records= min_records;
339 share->max_records= max_records;
340 share->max_table_size= create_info->max_table_size;
341 share->index_length= 0;
342 share->blength= 1;
343 share->keys= keys;
344 share->max_key_length= max_length;
345 share->column_count= create_info->columns;
346 share->changed= 0;
347 share->auto_key= create_info->auto_key;
348 share->auto_key_type= create_info->auto_key_type;
349 share->auto_increment= create_info->auto_increment;
350 share->create_time= (long) time((time_t*) 0);
351
352 share->fixed_data_length= fixed_data_length;
353 share->fixed_column_count= fixed_column_count;
354 share->blobs= create_info->blobs;
355
356 share->recordspace.chunk_length= chunk_length;
357 share->recordspace.chunk_dataspace_length= chunk_dataspace_length;
358 share->recordspace.is_variable_size= is_variable_size;
359 share->recordspace.total_data_length= 0;
360
361 if (is_variable_size) {
362 share->recordspace.offset_link= chunk_dataspace_length;
363 share->recordspace.offset_status= share->recordspace.offset_link +
364 sizeof(uchar **);
365 } else {
366 /* Make it likely to fail if anyone uses this offset */
367 share->recordspace.offset_link= 1 << 22;
368 share->recordspace.offset_status= chunk_dataspace_length;
369 }
370
371 /* Must be allocated separately for rename to work */
372 if (!(share->name= my_strdup(hp_key_memory_HP_SHARE,
373 name, MYF(0))))
374 {
375 my_free(share);
376 goto err;
377 }
378 if (!create_info->internal_table)
379 {
380 /*
381 Do not initialize THR_LOCK object for internal temporary tables.
382 It is not needed for such tables. Calling thr_lock_init() can
383 cause scalability issues since it acquires global lock.
384 */
385 thr_lock_init(&share->lock);
386 share->open_list.data= (void*) share;
387 heap_share_list= list_add(heap_share_list,&share->open_list);
388 }
389 else
390 share->delete_on_close= 1;
391 }
392 if (!create_info->internal_table)
393 {
394 if (create_info->pin_share)
395 ++share->open_count;
396 mysql_mutex_unlock(&THR_LOCK_heap);
397 }
398
399 *res= share;
400 DBUG_RETURN(0);
401
402 err:
403 if (!create_info->internal_table)
404 mysql_mutex_unlock(&THR_LOCK_heap);
405 DBUG_RETURN(1);
406 } /* heap_create */
407
408
keys_compare(heap_rb_param * param,uchar * key1,uchar * key2)409 static int keys_compare(heap_rb_param *param, uchar *key1, uchar *key2)
410 {
411 uint not_used[2];
412 return ha_key_cmp(param->keyseg, key1, key2, param->key_length,
413 param->search_flag, not_used);
414 }
415
init_block(HP_BLOCK * block,uint chunk_length,ulong min_records,ulong max_records)416 static void init_block(HP_BLOCK *block, uint chunk_length, ulong min_records,
417 ulong max_records)
418 {
419 uint i,recbuffer,records_in_block;
420
421 max_records= MY_MAX(min_records, max_records);
422 if (!max_records)
423 max_records= 1000; /* As good as quess as anything */
424 /*
425 We want to start each chunk at 8 bytes boundary, round recbuffer to the
426 next 8.
427 */
428 recbuffer= (uint) (chunk_length + sizeof(uchar**) - 1) &
429 ~(sizeof(uchar**) - 1);
430 records_in_block= max_records / 10;
431 if (records_in_block < 10 && max_records)
432 records_in_block= 10;
433 if (!records_in_block || (ulonglong) records_in_block * recbuffer >
434 (my_default_record_cache_size-sizeof(HP_PTRS)*HP_MAX_LEVELS))
435 records_in_block= (my_default_record_cache_size - sizeof(HP_PTRS) *
436 HP_MAX_LEVELS) / recbuffer + 1;
437 block->records_in_block= records_in_block;
438 block->recbuffer= recbuffer;
439 block->last_allocated= 0L;
440
441 for (i= 0; i <= HP_MAX_LEVELS; i++)
442 block->level_info[i].records_under_level=
443 (!i ? 1 : i == 1 ? records_in_block :
444 HP_PTRS_IN_NOD * block->level_info[i - 1].records_under_level);
445 }
446
447
heap_try_free(HP_SHARE * share)448 static inline void heap_try_free(HP_SHARE *share)
449 {
450 if (share->open_count == 0)
451 hp_free(share);
452 else
453 share->delete_on_close= 1;
454 }
455
456
heap_delete_table(const char * name)457 int heap_delete_table(const char *name)
458 {
459 int result;
460 HP_SHARE *share;
461 DBUG_ENTER("heap_delete_table");
462
463 mysql_mutex_lock(&THR_LOCK_heap);
464 if ((share= hp_find_named_heap(name)))
465 {
466 heap_try_free(share);
467 result= 0;
468 }
469 else
470 {
471 result= ENOENT;
472 set_my_errno(result);
473 }
474 mysql_mutex_unlock(&THR_LOCK_heap);
475 DBUG_RETURN(result);
476 }
477
478
heap_drop_table(HP_INFO * info)479 void heap_drop_table(HP_INFO *info)
480 {
481 DBUG_ENTER("heap_drop_table");
482 mysql_mutex_lock(&THR_LOCK_heap);
483 heap_try_free(info->s);
484 mysql_mutex_unlock(&THR_LOCK_heap);
485 DBUG_VOID_RETURN;
486 }
487
488
hp_free(HP_SHARE * share)489 void hp_free(HP_SHARE *share)
490 {
491 my_bool not_internal_table= (share->open_list.data != NULL);
492 if (not_internal_table) /* If not internal table */
493 heap_share_list= list_delete(heap_share_list, &share->open_list);
494 hp_clear(share); /* Remove blocks from memory */
495 if (not_internal_table)
496 thr_lock_delete(&share->lock);
497 my_free(share->name);
498 my_free(share);
499 return;
500 }
501