1 /* Copyright (C) 2000-2002 MySQL AB
2 Copyright (C) 2008 eBay, Inc
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */
16
17 /*
18 Implements various base dataspace-related functions - allocate, free, clear
19 */
20
21 #include "heapdef.h"
22
23
24 /*
25 MySQL Heap tables keep data in arrays of fixed-size chunks.
26 These chunks are organized into two groups of HP_BLOCK structures:
27 - group1 contains indexes, with one HP_BLOCK per key
28 (part of HP_KEYDEF)
29 - group2 contains record data, with single HP_BLOCK
30 for all records, referenced by HP_SHARE.recordspace.block
31
32 While columns used in index are usually small, other columns
33 in the table may need to accomodate larger data. Typically,
34 larger data is placed into VARCHAR or BLOB columns. With actual
35 sizes varying, Heap Engine has to support variable-sized records
36 in memory. Heap Engine implements the concept of dataspace
37 (HP_DATASPACE), which incorporates HP_BLOCK for the record data,
38 and adds more information for managing variable-sized records.
39
40 Variable-size records are stored in multiple "chunks",
41 which means that a single record of data (database "row") can
42 consist of multiple chunks organized into one "set". HP_BLOCK
43 contains chunks. In variable-size format, one record
44 is represented as one or many chunks, depending on the actual
45 data, while in fixed-size mode, one record is always represented
46 as one chunk. The index structures would always point to the first
47 chunk in the chunkset.
48
49 At the time of table creation, Heap Engine attempts to find out if
50 variable-size records are desired. A user can request
51 variable-size records by providing either row_type=dynamic or
52 key_block_size=NNN table create option. Heap Engine will check
53 whether key_block_size provides enough space in the first chunk
54 to keep all null bits and columns that are used in indexes.
55 If key_block_size is too small, table creation will be aborted
56 with an error. Heap Engine will revert to fixed-size allocation
57 mode if key_block_size provides no memory benefits (if the
58 fixed-size record would always be shorter then the first chunk
59 in the chunkset with the specified key_block_size).
60
61 In order to improve index search performance, Heap Engine needs
62 to keep all null flags and all columns used as keys inside
63 the first chunk of a chunkset. In particular, this means that
64 all columns used as keys should be defined first in the table
65 creation SQL. The length of data used by null bits and key columns
66 is stored as fixed_data_length inside HP_SHARE. fixed_data_length
67 will extend past last key column if more fixed-length fields can
68 fit into the first chunk.
69
70 Variable-size records are necessary only in the presence of
71 variable-size columns. Heap Engine will be looking for BLOB
72 columns or VARCHAR columns, which declare length of 32 or more. If
73 no such columns are found, table will be switched to fixed-size
74 format. You should always try to put such columns at the end of
75 the table definition.
76
77 Whenever data is being inserted or updated in the table
78 Heap Engine will calculate how many chunks are necessary.
79 For insert operations, Heap Engine allocates new chunkset in
80 the recordspace. For update operations it will modify length of
81 the existing chunkset, unlinking unnecessary chunks at the end,
82 or allocating and adding more if larger length is necessary.
83
84 When writing data to chunks or copying data back to record,
85 fixed-size columns are copied in their full format. VARCHARs and
86 BLOBs are copied based on their actual length. Any NULL values
87 after fixed_data_length are skipped.
88
89 The allocation and contents of the actual chunks varies between
90 fixed and variable-size modes. Total chunk length is always
91 aligned to the next sizeof(uchar*). Here is the format of
92 fixed-size chunk:
93 uchar[] - sizeof=chunk_dataspace_length, but at least
94 sizeof(uchar*) bytes. Keeps actual data or pointer
95 to the next deleted chunk.
96 chunk_dataspace_length equals to full record length
97 uchar - status field (1 means "in use", 0 means "deleted")
98
99 Variable-size chunk uses different format:
100 uchar[] - sizeof=chunk_dataspace_length, but at least
101 sizeof(uchar*) bytes. Keeps actual data or pointer
102 to the next deleted chunk.
103 chunk_dataspace_length is set according to table
104 setup (key_block_size)
105 uchar* - pointer to the next chunk in this chunkset,
106 or NULL for the last chunk
107 uchar - status field (1 means "first", 0 means "deleted",
108 2 means "linked")
109
110 When allocating a new chunkset of N chunks, Heap Engine will try
111 to allocate chunks one-by-one, linking them as they become
112 allocated. Allocation of a single chunk will attempt to reuse
113 a deleted (freed) chunk. If no free chunks are available,
114 it will attempt to allocate a new area inside HP_BLOCK.
115 Freeing chunks will place them at the front of free list
116 referenced by del_link in HP_DATASPACE. The newly freed chunk
117 will contain reference to the previously freed chunk in its first
118 sizeof(uchar*) of the payload space.
119
120 Here is open issues:
121 - It is not very nice to require people to keep key columns
122 at the beginning of the table creation SQL. There are three
123 proposed resolutions:
124 a. Leave it as is. It's a reasonable limitation
125 b. Add new HA_KEEP_KEY_COLUMNS_TO_FRONT flag to handler.h and
126 make table.cpp align columns when it creates the table
127 c. Make HeapEngine reorder columns in the chunk data, so that
128 key columns go first. Add parallel HA_KEYSEG structures
129 to distinguish positions in record vs. positions in
130 the first chunk. Copy all data field-by-field rather than
131 using single memcpy unless DBA kept key columns to
132 the beginning.
133 - heap_check_heap needs verify linked chunks, looking for
134 issues such as orphans, cycles, and bad links. However,
135 Heap Engine today does not do similar things even for
136 free list.
137 - In a more sophisticated implementation, some space can
138 be saved even with all fixed-size columns if many of them
139 have NULL value, as long as these columns are not used
140 in indexes
141 - In variable-size format status should be moved to lower
142 bits of the "next" pointer. Pointer is always aligned
143 to sizeof(byte*), which is at least 4, leaving 2 lower
144 bits free. This will save 8 bytes per chunk
145 on 64-bit platform.
146 - As we do not want to modify FRM format or to add new SQL
147 keywords, KEY_BLOCK_SIZE option of "CREATE TABLE" is reused
148 to specify block size for Heap Engine tables.
149 - since all key columns must fit in the first chunk, having keys
150 on BLOB columns is currently impossible. This limitation is
151 relatively easiy to remove in future.
152 */
153
154 static uchar *hp_allocate_one_chunk(HP_DATASPACE *info);
155
156
157 /**
158 Clear a dataspace
159
160 Frees memory and zeros-out any relevant counters in the dataspace
161
162 @param info the dataspace to clear
163 */
164
hp_clear_dataspace(HP_DATASPACE * info)165 void hp_clear_dataspace(HP_DATASPACE *info)
166 {
167 if (info->block.levels)
168 {
169 hp_free_level(&info->block,info->block.levels,info->block.root,
170 (uchar *) 0);
171 }
172 info->block.levels= 0;
173 info->del_chunk_count= info->chunk_count= 0;
174 info->del_link= 0;
175 info->total_data_length= 0;
176 }
177
178
179 /**
180 Allocate or reallocate a chunkset in the dataspace
181
182 Attempts to allocate a new chunkset or change the size of an existing chunkset
183
184 @param info the hosting dataspace
185 @param chunk_count the number of chunks that we expect as the result
186 @param existing_set non-null value asks function to resize existing
187 chunkset, return value would point to this set
188
189 @return Pointer to the first chunk in the new or updated chunkset, or NULL
190 if unsuccessful
191 */
192
hp_allocate_variable_chunkset(HP_DATASPACE * info,uint chunk_count,uchar * existing_set)193 static uchar *hp_allocate_variable_chunkset(HP_DATASPACE *info,
194 uint chunk_count,
195 uchar *existing_set)
196 {
197 int alloc_count= chunk_count, i;
198 uchar *first_chunk= 0, *curr_chunk= 0, *prev_chunk= 0;
199 uchar *last_existing_chunk= 0;
200
201 DBUG_ASSERT(alloc_count);
202
203 if (existing_set)
204 {
205 first_chunk= existing_set;
206
207 curr_chunk= existing_set;
208 while (curr_chunk && alloc_count)
209 {
210 prev_chunk= curr_chunk;
211 curr_chunk= *((uchar **) (curr_chunk + info->offset_link));
212 alloc_count--;
213 }
214
215 if (!alloc_count)
216 {
217 if (curr_chunk)
218 {
219 /*
220 We came through all chunks and there is more left, let's truncate the
221 list.
222 */
223 *((uchar **) (prev_chunk + info->offset_link))= NULL;
224 hp_free_chunks(info, curr_chunk);
225 }
226
227 return first_chunk;
228 }
229
230 last_existing_chunk= prev_chunk;
231 }
232
233 /*
234 We can reach this point only if we're allocating new chunkset or more chunks
235 in existing set.
236 */
237
238 for (i= 0; i < alloc_count; i++)
239 {
240 curr_chunk= hp_allocate_one_chunk(info);
241 if (!curr_chunk)
242 {
243 /* no space in the current block */
244
245 if (last_existing_chunk)
246 {
247 /* Truncate whatever was added at the end of the existing chunkset */
248 prev_chunk= last_existing_chunk;
249 curr_chunk= *((uchar **)(prev_chunk + info->offset_link));
250 *((uchar **)(prev_chunk + info->offset_link))= NULL;
251 hp_free_chunks(info, curr_chunk);
252 }
253 else if (first_chunk)
254 {
255 /* free any chunks previously allocated */
256 hp_free_chunks(info, first_chunk);
257 }
258
259 return NULL;
260 }
261
262 /* mark as if this chunk is last in the chunkset */
263 *((uchar **) (curr_chunk + info->offset_link))= 0;
264
265 if (prev_chunk)
266 {
267 /* tie them into a linked list */
268 *((uchar **) (prev_chunk + info->offset_link))= curr_chunk;
269 /* Record linked from active */
270 curr_chunk[info->offset_status]= CHUNK_STATUS_LINKED;
271 }
272 else
273 {
274 /* Record active */
275 curr_chunk[info->offset_status]= CHUNK_STATUS_ACTIVE;
276 }
277
278 if (!first_chunk)
279 {
280 first_chunk= curr_chunk;
281 }
282
283 prev_chunk= curr_chunk;
284 }
285
286 return first_chunk;
287 }
288
289
290 /**
291 Allocate a new chunkset in the dataspace
292
293 Attempts to allocate a new chunkset
294
295 @param info the hosting dataspace
296 @param chunk_count the number of chunks that we expect as the result
297
298 @return Pointer to the first chunk in the new or updated chunkset, or NULL if
299 unsuccessful
300 */
301
hp_allocate_chunkset(HP_DATASPACE * info,uint chunk_count)302 uchar *hp_allocate_chunkset(HP_DATASPACE *info, uint chunk_count)
303 {
304 uchar *result;
305
306 DBUG_ENTER("hp_allocate_chunks");
307
308 if (info->is_variable_size)
309 {
310 result = hp_allocate_variable_chunkset(info, chunk_count, NULL);
311 }
312 else
313 {
314 result= hp_allocate_one_chunk(info);
315 if (result)
316 {
317 result[info->offset_status]= CHUNK_STATUS_ACTIVE;
318 }
319
320 DBUG_RETURN(result);
321 }
322
323 DBUG_RETURN(result);
324 }
325
326
327 /**
328 Reallocate an existing chunkset in the dataspace
329
330 Attempts to change the size of an existing chunkset
331
332 @param info the hosting dataspace
333 @param chunk_count the number of chunks that we expect as the result
334 @param pos pointer to the existing chunkset
335
336 @return Error code or zero if successful
337 */
338
hp_reallocate_chunkset(HP_DATASPACE * info,uint chunk_count,uchar * pos)339 int hp_reallocate_chunkset(HP_DATASPACE *info, uint chunk_count, uchar *pos)
340 {
341 DBUG_ENTER("hp_reallocate_chunks");
342
343 if (!info->is_variable_size)
344 {
345 /* Update should never change chunk_count in fixed-size mode */
346 my_errno= HA_ERR_WRONG_COMMAND;
347 DBUG_RETURN(my_errno);
348 }
349
350 /* Reallocate never moves the first chunk */
351 if (!hp_allocate_variable_chunkset(info, chunk_count, pos))
352 DBUG_RETURN(my_errno);
353
354 DBUG_RETURN(0);
355 }
356
357
358 /**
359 Allocate a single chunk in the dataspace
360
361 Attempts to allocate a new chunk or reuse one from deleted list
362
363 @param info the hosting dataspace
364
365 @return Pointer to the chunk, or NULL if unsuccessful
366 */
367
hp_allocate_one_chunk(HP_DATASPACE * info)368 static uchar *hp_allocate_one_chunk(HP_DATASPACE *info)
369 {
370 uchar *curr_chunk;
371 size_t length;
372 ulong block_pos;
373
374 if (info->del_link)
375 {
376 curr_chunk= info->del_link;
377 info->del_link= *((uchar **) curr_chunk);
378 info->del_chunk_count--;
379
380 DBUG_PRINT("hp_allocate_one_chunk",
381 ("Used old position: 0x%lx",(long) curr_chunk));
382 return curr_chunk;
383 }
384
385 block_pos= (info->chunk_count % info->block.records_in_block);
386 if (!block_pos)
387 {
388 if (hp_get_new_block(&info->block, &length))
389 {
390 /* no space in the current block */
391 return NULL;
392 }
393
394 info->total_data_length+= length;
395 }
396
397 info->chunk_count++;
398 curr_chunk= ((uchar *) info->block.level_info[0].last_blocks +
399 block_pos * info->block.recbuffer);
400
401 DBUG_PRINT("hp_allocate_one_chunk",
402 ("Used new position: 0x%lx", (long) curr_chunk));
403
404 return curr_chunk;
405 }
406
407
408 /**
409 Free a list of chunks
410
411 Reclaims all chunks linked by the pointer,
412 which could be the whole chunkset or a part of an existing chunkset
413
414 @param info the hosting dataspace
415 @param pos pointer to the head of the chunkset
416 */
417
hp_free_chunks(HP_DATASPACE * info,uchar * pos)418 void hp_free_chunks(HP_DATASPACE *info, uchar *pos)
419 {
420 uchar *curr_chunk= pos;
421
422 while (curr_chunk)
423 {
424 info->del_chunk_count++;
425 *((uchar **) curr_chunk)= info->del_link;
426 info->del_link= curr_chunk;
427
428 curr_chunk[info->offset_status]= CHUNK_STATUS_DELETED;
429
430 DBUG_PRINT("hp_free_chunks",("Freed position: 0x%lx", (long) curr_chunk));
431
432 if (!info->is_variable_size)
433 {
434 break;
435 }
436
437 /* Delete next chunk in this chunkset */
438 curr_chunk= *((uchar **)(curr_chunk + info->offset_link));
439 }
440 }
441