1 /* Copyright (C) 2000-2002 MySQL AB
2    Copyright (C) 2008 eBay, Inc
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; version 2 of the License.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software
15    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA */
16 
17 /*
18   Implements various base dataspace-related functions - allocate, free, clear
19 */
20 
21 #include "heapdef.h"
22 
23 
24 /*
25   MySQL Heap tables keep data in arrays of fixed-size chunks.
26   These chunks are organized into two groups of HP_BLOCK structures:
27     - group1 contains indexes, with one HP_BLOCK per key
28       (part of HP_KEYDEF)
29     - group2 contains record data, with single HP_BLOCK
30       for all records, referenced by HP_SHARE.recordspace.block
31 
32   While columns used in index are usually small, other columns
33   in the table may need to accomodate larger data. Typically,
34   larger data is placed into VARCHAR or BLOB columns. With actual
35   sizes varying, Heap Engine has to support variable-sized records
36   in memory. Heap Engine implements the concept of dataspace
37   (HP_DATASPACE), which incorporates HP_BLOCK for the record data,
38   and adds more information for managing variable-sized records.
39 
40   Variable-size records are stored in multiple "chunks",
41   which means that a single record of data (database "row") can
42   consist of multiple chunks organized into one "set". HP_BLOCK
43   contains chunks. In variable-size format, one record
44   is represented as one or many chunks, depending on the actual
45   data, while in fixed-size mode, one record is always represented
46   as one chunk. The index structures would always point to the first
47   chunk in the chunkset.
48 
49   At the time of table creation, Heap Engine attempts to find out if
50   variable-size records are desired. A user can request
51   variable-size records by providing either row_type=dynamic or
52   key_block_size=NNN table create option. Heap Engine will check
53   whether key_block_size provides enough space in the first chunk
54   to keep all null bits and columns that are used in indexes.
55   If key_block_size is too small, table creation will be aborted
56   with an error. Heap Engine will revert to fixed-size allocation
57   mode if key_block_size provides no memory benefits (if the
58   fixed-size record would always be shorter then the first chunk
59   in the chunkset with the specified key_block_size).
60 
61   In order to improve index search performance, Heap Engine needs
62   to keep all null flags and all columns used as keys inside
63   the first chunk of a chunkset. In particular, this means that
64   all columns used as keys should be defined first in the table
65   creation SQL. The length of data used by null bits and key columns
66   is stored as fixed_data_length inside HP_SHARE. fixed_data_length
67   will extend past last key column if more fixed-length fields can
68   fit into the first chunk.
69 
70   Variable-size records are necessary only in the presence of
71   variable-size columns. Heap Engine will be looking for BLOB
72   columns or VARCHAR columns, which declare length of 32 or more. If
73   no such columns are found, table will be switched to fixed-size
74   format. You should always try to put such columns at the end of
75   the table definition.
76 
77   Whenever data is being inserted or updated in the table
78   Heap Engine will calculate how many chunks are necessary.
79   For insert operations, Heap Engine allocates new chunkset in
80   the recordspace. For update operations it will modify length of
81   the existing chunkset, unlinking unnecessary chunks at the end,
82   or allocating and adding more if larger length is necessary.
83 
84   When writing data to chunks or copying data back to record,
85   fixed-size columns are copied in their full format. VARCHARs and
86   BLOBs are copied based on their actual length. Any NULL values
87   after fixed_data_length are skipped.
88 
89   The allocation and contents of the actual chunks varies between
90   fixed and variable-size modes. Total chunk length is always
91   aligned to the next sizeof(uchar*). Here is the format of
92   fixed-size chunk:
93       uchar[] - sizeof=chunk_dataspace_length, but at least
94                 sizeof(uchar*) bytes. Keeps actual data or pointer
95                 to the next deleted chunk.
96                 chunk_dataspace_length equals to full record length
97       uchar   - status field (1 means "in use", 0 means "deleted")
98 
99   Variable-size chunk uses different format:
100       uchar[] - sizeof=chunk_dataspace_length, but at least
101                 sizeof(uchar*) bytes. Keeps actual data or pointer
102                 to the next deleted chunk.
103                 chunk_dataspace_length is set according to table
104                 setup (key_block_size)
105       uchar*  - pointer to the next chunk in this chunkset,
106                 or NULL for the last chunk
107       uchar   - status field (1 means "first", 0 means "deleted",
108                 2 means "linked")
109 
110   When allocating a new chunkset of N chunks, Heap Engine will try
111   to allocate chunks one-by-one, linking them as they become
112   allocated. Allocation of a single chunk will attempt to reuse
113   a deleted (freed) chunk. If no free chunks are available,
114   it will attempt to allocate a new area inside HP_BLOCK.
115   Freeing chunks will place them at the front of free list
116   referenced by del_link in HP_DATASPACE. The newly freed chunk
117   will contain reference to the previously freed chunk in its first
118   sizeof(uchar*) of the payload space.
119 
120   Here is open issues:
121     -  It is not very nice to require people to keep key columns
122        at the beginning of the table creation SQL. There are three
123        proposed resolutions:
124        a. Leave it as is. It's a reasonable limitation
125        b. Add new HA_KEEP_KEY_COLUMNS_TO_FRONT flag to handler.h and
126           make table.cpp align columns when it creates the table
127        c. Make HeapEngine reorder columns in the chunk data, so that
128           key columns go first. Add parallel HA_KEYSEG structures
129           to distinguish positions in record vs. positions in
130           the first chunk. Copy all data field-by-field rather than
131           using single memcpy unless DBA kept key columns to
132           the beginning.
133     -  heap_check_heap needs verify linked chunks, looking for
134        issues such as orphans, cycles, and bad links. However,
135        Heap Engine today does not do similar things even for
136        free list.
137     -  In a more sophisticated implementation, some space can
138        be saved even with all fixed-size columns if many of them
139        have NULL value, as long as these columns are not used
140        in indexes
141     -  In variable-size format status should be moved to lower
142        bits of the "next" pointer. Pointer is always aligned
143        to sizeof(byte*), which is at least 4, leaving 2 lower
144        bits free. This will save 8 bytes per chunk
145        on 64-bit platform.
146     -  As we do not want to modify FRM format or to add new SQL
147        keywords, KEY_BLOCK_SIZE option of "CREATE TABLE" is reused
148        to specify block size for Heap Engine tables.
149     -  since all key columns must fit in the first chunk, having keys
150        on BLOB columns is currently impossible. This limitation is
151        relatively easiy to remove in future.
152 */
153 
154 static uchar *hp_allocate_one_chunk(HP_DATASPACE *info);
155 
156 
157 /**
158   Clear a dataspace
159 
160   Frees memory and zeros-out any relevant counters in the dataspace
161 
162   @param  info  the dataspace to clear
163 */
164 
hp_clear_dataspace(HP_DATASPACE * info)165 void hp_clear_dataspace(HP_DATASPACE *info)
166 {
167   if (info->block.levels)
168   {
169     hp_free_level(&info->block,info->block.levels,info->block.root,
170                   (uchar *) 0);
171   }
172   info->block.levels= 0;
173   info->del_chunk_count= info->chunk_count= 0;
174   info->del_link= 0;
175   info->total_data_length= 0;
176 }
177 
178 
179 /**
180   Allocate or reallocate a chunkset in the dataspace
181 
182   Attempts to allocate a new chunkset or change the size of an existing chunkset
183 
184   @param  info            the hosting dataspace
185   @param  chunk_count     the number of chunks that we expect as the result
186   @param  existing_set    non-null value asks function to resize existing
187                           chunkset, return value would point to this set
188 
189   @return  Pointer to the first chunk in the new or updated chunkset, or NULL
190            if unsuccessful
191 */
192 
hp_allocate_variable_chunkset(HP_DATASPACE * info,uint chunk_count,uchar * existing_set)193 static uchar *hp_allocate_variable_chunkset(HP_DATASPACE *info,
194                                            uint chunk_count,
195                                            uchar *existing_set)
196 {
197   int alloc_count= chunk_count, i;
198   uchar *first_chunk= 0, *curr_chunk= 0, *prev_chunk= 0;
199   uchar  *last_existing_chunk= 0;
200 
201   assert(alloc_count);
202 
203   if (existing_set)
204   {
205     first_chunk= existing_set;
206 
207     curr_chunk= existing_set;
208     while (curr_chunk && alloc_count)
209     {
210       prev_chunk= curr_chunk;
211       curr_chunk= *((uchar **) (curr_chunk + info->offset_link));
212       alloc_count--;
213     }
214 
215     if (!alloc_count)
216     {
217       if (curr_chunk)
218       {
219         /*
220           We came through all chunks and there is more left, let's truncate the
221           list.
222         */
223         *((uchar **) (prev_chunk + info->offset_link))= NULL;
224         hp_free_chunks(info, curr_chunk);
225       }
226 
227       return first_chunk;
228     }
229 
230     last_existing_chunk= prev_chunk;
231   }
232 
233   /*
234     We can reach this point only if we're allocating new chunkset or more chunks
235     in existing set.
236   */
237 
238   for (i= 0; i < alloc_count; i++)
239   {
240     curr_chunk= hp_allocate_one_chunk(info);
241     if (!curr_chunk)
242     {
243       /* no space in the current block */
244 
245       if (last_existing_chunk)
246       {
247         /* Truncate whatever was added at the end of the existing chunkset */
248         prev_chunk= last_existing_chunk;
249         curr_chunk= *((uchar **)(prev_chunk + info->offset_link));
250         *((uchar **)(prev_chunk + info->offset_link))= NULL;
251         hp_free_chunks(info, curr_chunk);
252       }
253       else if (first_chunk)
254       {
255         /* free any chunks previously allocated */
256         hp_free_chunks(info, first_chunk);
257       }
258 
259       return NULL;
260     }
261 
262     /* mark as if this chunk is last in the chunkset */
263     *((uchar **) (curr_chunk + info->offset_link))= 0;
264 
265     if (prev_chunk)
266     {
267       /* tie them into a linked list */
268       *((uchar **) (prev_chunk + info->offset_link))= curr_chunk;
269       /* Record linked from active */
270       curr_chunk[info->offset_status]= CHUNK_STATUS_LINKED;
271     }
272     else
273     {
274       /* Record active */
275       curr_chunk[info->offset_status]= CHUNK_STATUS_ACTIVE;
276     }
277 
278     if (!first_chunk)
279     {
280       first_chunk= curr_chunk;
281     }
282 
283     prev_chunk= curr_chunk;
284 }
285 
286   return first_chunk;
287 }
288 
289 
290 /**
291   Allocate a new chunkset in the dataspace
292 
293   Attempts to allocate a new chunkset
294 
295   @param  info            the hosting dataspace
296   @param  chunk_count     the number of chunks that we expect as the result
297 
298   @return  Pointer to the first chunk in the new or updated chunkset, or NULL if
299            unsuccessful
300 */
301 
hp_allocate_chunkset(HP_DATASPACE * info,uint chunk_count)302 uchar *hp_allocate_chunkset(HP_DATASPACE *info, uint chunk_count)
303 {
304   uchar *result;
305 
306   DBUG_ENTER("hp_allocate_chunks");
307 
308   if (info->is_variable_size)
309   {
310     result = hp_allocate_variable_chunkset(info, chunk_count, NULL);
311   }
312   else
313   {
314     result= hp_allocate_one_chunk(info);
315     if (result)
316     {
317       result[info->offset_status]= CHUNK_STATUS_ACTIVE;
318     }
319 
320     DBUG_RETURN(result);
321   }
322 
323   DBUG_RETURN(result);
324 }
325 
326 
327 /**
328   Reallocate an existing chunkset in the dataspace
329 
330   Attempts to change the size of an existing chunkset
331 
332   @param  info            the hosting dataspace
333   @param  chunk_count     the number of chunks that we expect as the result
334   @param  pos             pointer to the existing chunkset
335 
336   @return  Error code or zero if successful
337 */
338 
hp_reallocate_chunkset(HP_DATASPACE * info,uint chunk_count,uchar * pos)339 int hp_reallocate_chunkset(HP_DATASPACE *info, uint chunk_count, uchar *pos)
340 {
341   DBUG_ENTER("hp_reallocate_chunks");
342 
343   if (!info->is_variable_size)
344   {
345     /* Update should never change chunk_count in fixed-size mode */
346     set_my_errno(HA_ERR_WRONG_COMMAND);
347     DBUG_RETURN(HA_ERR_WRONG_COMMAND);
348   }
349 
350   /* Reallocate never moves the first chunk */
351   if (!hp_allocate_variable_chunkset(info, chunk_count, pos))
352     DBUG_RETURN(my_errno());
353 
354   DBUG_RETURN(0);
355 }
356 
357 
358 /**
359   Allocate a single chunk in the dataspace
360 
361   Attempts to allocate a new chunk or reuse one from deleted list
362 
363   @param  info            the hosting dataspace
364 
365   @return  Pointer to the chunk, or NULL if unsuccessful
366 */
367 
hp_allocate_one_chunk(HP_DATASPACE * info)368 static uchar *hp_allocate_one_chunk(HP_DATASPACE *info)
369 {
370   uchar *curr_chunk;
371   size_t length;
372   ulong block_pos;
373 
374   if (info->del_link)
375   {
376     curr_chunk= info->del_link;
377     info->del_link= *((uchar **) curr_chunk);
378     info->del_chunk_count--;
379 
380     DBUG_PRINT("hp_allocate_one_chunk",
381                ("Used old position: 0x%lx",(long) curr_chunk));
382     return curr_chunk;
383   }
384 
385   block_pos= (info->chunk_count % info->block.records_in_block);
386   if (!block_pos)
387   {
388     if (hp_get_new_block(&info->block, &length))
389     {
390       /* no space in the current block */
391       return NULL;
392     }
393 
394     info->total_data_length+= length;
395   }
396 
397   info->chunk_count++;
398   curr_chunk= ((uchar *) info->block.level_info[0].last_blocks +
399                block_pos * info->block.recbuffer);
400 
401   DBUG_PRINT("hp_allocate_one_chunk",
402              ("Used new position: 0x%lx", (long) curr_chunk));
403 
404   return curr_chunk;
405 }
406 
407 
408 /**
409   Free a list of chunks
410 
411   Reclaims all chunks linked by the pointer,
412   which could be the whole chunkset or a part of an existing chunkset
413 
414   @param  info            the hosting dataspace
415   @param  pos             pointer to the head of the chunkset
416 */
417 
hp_free_chunks(HP_DATASPACE * info,uchar * pos)418 void hp_free_chunks(HP_DATASPACE *info, uchar *pos)
419 {
420   uchar *curr_chunk= pos;
421 
422   while (curr_chunk)
423   {
424     info->del_chunk_count++;
425     *((uchar **) curr_chunk)= info->del_link;
426     info->del_link= curr_chunk;
427 
428     curr_chunk[info->offset_status]= CHUNK_STATUS_DELETED;
429 
430     DBUG_PRINT("hp_free_chunks",("Freed position: 0x%lx", (long) curr_chunk));
431 
432     if (!info->is_variable_size)
433     {
434       break;
435     }
436 
437     /* Delete next chunk in this chunkset */
438     curr_chunk= *((uchar **)(curr_chunk + info->offset_link));
439   }
440 }
441