1 /* Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
22 
23 /*
24   Functions to handle space-packed-records and blobs
25 
26   A row may be stored in one or more linked blocks.
27   The block size is between MI_MIN_BLOCK_LENGTH and MI_MAX_BLOCK_LENGTH.
28   Each block is aligned on MI_DYN_ALIGN_SIZE.
29   The reson for the max block size is to not have too many different types
30   of blocks.  For the differnet block types, look at _mi_get_block_info()
31 */
32 
33 #include "my_config.h"
34 
35 #include <fcntl.h>
36 #include <sys/types.h>
37 
38 #include <algorithm>
39 
40 #include "my_byteorder.h"
41 #include "my_compiler.h"
42 #include "my_dbug.h"
43 #include "my_inttypes.h"
44 #include "my_io.h"
45 #include "my_macros.h"
46 #include "my_pointer_arithmetic.h"
47 #include "sql/field.h"
48 #include "storage/myisam/myisam_sys.h"
49 #include "storage/myisam/myisamdef.h"
50 
51 /* Enough for comparing if number is zero */
52 static char zero_string[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
53 
54 static int write_dynamic_record(MI_INFO *info, uchar *record, ulong reclength);
55 static int _mi_find_writepos(MI_INFO *info, ulong reclength, my_off_t *filepos,
56                              ulong *length);
57 static int update_dynamic_record(MI_INFO *info, my_off_t filepos, uchar *record,
58                                  ulong reclength);
59 static int delete_dynamic_record(MI_INFO *info, my_off_t filepos,
60                                  uint second_read);
61 static int _mi_cmp_buffer(File file, const uchar *buff, my_off_t filepos,
62                           uint length);
63 
64 /* Interface function from MI_INFO */
65 
66 /*
67   Create mmaped area for MyISAM handler
68 
69   SYNOPSIS
70     mi_dynmap_file()
71     info		MyISAM handler
72 
73   RETURN
74     0  ok
75     1  error.
76 */
77 
mi_dynmap_file(MI_INFO * info,my_off_t size)78 bool mi_dynmap_file(MI_INFO *info, my_off_t size) {
79   DBUG_TRACE;
80   if (size == 0 || size > (my_off_t)(~((size_t)0))) {
81     if (size)
82       DBUG_PRINT("warning", ("File is too large for mmap"));
83     else
84       DBUG_PRINT("warning", ("Do not mmap zero-length"));
85     return true;
86   }
87   /*
88     I wonder if it is good to use MAP_NORESERVE. From the Linux man page:
89     MAP_NORESERVE
90       Do not reserve swap space for this mapping. When swap space is
91       reserved, one has the guarantee that it is possible to modify the
92       mapping. When swap space is not reserved one might get SIGSEGV
93       upon a write if no physical memory is available.
94   */
95   info->s->file_map = (uchar *)my_mmap(
96       nullptr, (size_t)size,
97       info->s->mode == O_RDONLY ? PROT_READ : PROT_READ | PROT_WRITE,
98       MAP_SHARED | MAP_NORESERVE, info->dfile, 0L);
99   if (info->s->file_map == (uchar *)MAP_FAILED) {
100     info->s->file_map = nullptr;
101     return true;
102   }
103 #if defined(HAVE_MADVISE)
104   madvise((char *)info->s->file_map, size, MADV_RANDOM);
105 #endif
106   info->s->mmaped_length = size;
107   info->s->file_read = mi_mmap_pread;
108   info->s->file_write = mi_mmap_pwrite;
109   return false;
110 }
111 
112 /*
113   Destroy mmaped area for MyISAM handler
114 
115   SYNOPSIS
116     mi_munmap_file()
117     info                  MyISAM handler
118 
119   RETURN
120     0  ok
121    !0  error.
122 */
123 
mi_munmap_file(MI_INFO * info)124 int mi_munmap_file(MI_INFO *info) {
125   int ret;
126   DBUG_TRACE;
127   if ((ret = my_munmap((void *)info->s->file_map,
128                        (size_t)info->s->mmaped_length)))
129     return ret;
130   info->s->file_read = mi_nommap_pread;
131   info->s->file_write = mi_nommap_pwrite;
132   info->s->file_map = nullptr;
133   info->s->mmaped_length = 0;
134   return 0;
135 }
136 
137 /*
138   Resize mmaped area for MyISAM handler
139 
140   SYNOPSIS
141     mi_remap_file()
142     info		MyISAM handler
143 
144   RETURN
145 */
146 
mi_remap_file(MI_INFO * info,my_off_t size)147 void mi_remap_file(MI_INFO *info, my_off_t size) {
148   if (info->s->file_map) {
149     mi_munmap_file(info);
150     mi_dynmap_file(info, size);
151   }
152 }
153 
154 /*
155   Read bytes from MySAM handler, using mmap or pread
156 
157   SYNOPSIS
158     mi_mmap_pread()
159     info		MyISAM handler
160     Buffer              Input buffer
161     Count               Count of bytes for read
162     offset              Start position
163     MyFlags
164 
165   RETURN
166     0  ok
167 */
168 
mi_mmap_pread(MI_INFO * info,uchar * Buffer,size_t Count,my_off_t offset,myf MyFlags)169 size_t mi_mmap_pread(MI_INFO *info, uchar *Buffer, size_t Count,
170                      my_off_t offset, myf MyFlags) {
171   DBUG_PRINT("info", ("mi_read with mmap %d\n", info->dfile));
172   if (info->s->concurrent_insert) mysql_rwlock_rdlock(&info->s->mmap_lock);
173 
174   /*
175     The following test may fail in the following cases:
176     - We failed to remap a memory area (fragmented memory?)
177     - This thread has done some writes, but not yet extended the
178     memory mapped area.
179   */
180 
181   if (info->s->mmaped_length >= offset + Count) {
182     memcpy(Buffer, info->s->file_map + offset, Count);
183     if (info->s->concurrent_insert) mysql_rwlock_unlock(&info->s->mmap_lock);
184     return 0;
185   } else {
186     if (info->s->concurrent_insert) mysql_rwlock_unlock(&info->s->mmap_lock);
187     return mysql_file_pread(info->dfile, Buffer, Count, offset, MyFlags);
188   }
189 }
190 
191 /* wrapper for mysql_file_pread in case if mmap isn't used */
192 
mi_nommap_pread(MI_INFO * info,uchar * Buffer,size_t Count,my_off_t offset,myf MyFlags)193 size_t mi_nommap_pread(MI_INFO *info, uchar *Buffer, size_t Count,
194                        my_off_t offset, myf MyFlags) {
195   return mysql_file_pread(info->dfile, Buffer, Count, offset, MyFlags);
196 }
197 
198 /*
199   Write bytes to MySAM handler, using mmap or pwrite
200 
201   SYNOPSIS
202     mi_mmap_pwrite()
203     info		MyISAM handler
204     Buffer              Output buffer
205     Count               Count of bytes for write
206     offset              Start position
207     MyFlags
208 
209   RETURN
210     0  ok
211     !=0  error.  In this case return error from pwrite
212 */
213 
mi_mmap_pwrite(MI_INFO * info,const uchar * Buffer,size_t Count,my_off_t offset,myf MyFlags)214 size_t mi_mmap_pwrite(MI_INFO *info, const uchar *Buffer, size_t Count,
215                       my_off_t offset, myf MyFlags) {
216   DBUG_PRINT("info", ("mi_write with mmap %d\n", info->dfile));
217   if (info->s->concurrent_insert) mysql_rwlock_rdlock(&info->s->mmap_lock);
218 
219   /*
220     The following test may fail in the following cases:
221     - We failed to remap a memory area (fragmented memory?)
222     - This thread has done some writes, but not yet extended the
223     memory mapped area.
224   */
225 
226   if (info->s->mmaped_length >= offset + Count) {
227     memcpy(info->s->file_map + offset, Buffer, Count);
228     if (info->s->concurrent_insert) mysql_rwlock_unlock(&info->s->mmap_lock);
229     return 0;
230   } else {
231     info->s->nonmmaped_inserts++;
232     if (info->s->concurrent_insert) mysql_rwlock_unlock(&info->s->mmap_lock);
233     return mysql_file_pwrite(info->dfile, Buffer, Count, offset, MyFlags);
234   }
235 }
236 
237 /* wrapper for mysql_file_pwrite in case if mmap isn't used */
238 
mi_nommap_pwrite(MI_INFO * info,const uchar * Buffer,size_t Count,my_off_t offset,myf MyFlags)239 size_t mi_nommap_pwrite(MI_INFO *info, const uchar *Buffer, size_t Count,
240                         my_off_t offset, myf MyFlags) {
241   return mysql_file_pwrite(info->dfile, Buffer, Count, offset, MyFlags);
242 }
243 
_mi_write_dynamic_record(MI_INFO * info,const uchar * record)244 int _mi_write_dynamic_record(MI_INFO *info, const uchar *record) {
245   ulong reclength = _mi_rec_pack(info, info->rec_buff, record);
246   return (write_dynamic_record(info, info->rec_buff, reclength));
247 }
248 
_mi_update_dynamic_record(MI_INFO * info,my_off_t pos,const uchar * record)249 int _mi_update_dynamic_record(MI_INFO *info, my_off_t pos,
250                               const uchar *record) {
251   uint length = _mi_rec_pack(info, info->rec_buff, record);
252   return (update_dynamic_record(info, pos, info->rec_buff, length));
253 }
254 
_mi_write_blob_record(MI_INFO * info,const uchar * record)255 int _mi_write_blob_record(MI_INFO *info, const uchar *record) {
256   uchar *rec_buff;
257   int error;
258   ulong reclength, reclength2, extra;
259 
260   extra = (ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER) + MI_SPLIT_LENGTH +
261            MI_DYN_DELETE_BLOCK_HEADER + 1);
262   reclength = (info->s->base.pack_reclength +
263                _my_calc_total_blob_length(info, record) + extra);
264   if (!(rec_buff = (uchar *)my_malloc(mi_key_memory_record_buffer, reclength,
265                                       MYF(0)))) {
266     set_my_errno(HA_ERR_OUT_OF_MEM); /* purecov: inspected */
267     return (-1);
268   }
269   reclength2 = _mi_rec_pack(
270       info, rec_buff + ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER), record);
271   DBUG_PRINT("info",
272              ("reclength: %lu  reclength2: %lu", reclength, reclength2));
273   DBUG_ASSERT(reclength2 <= reclength);
274   error = write_dynamic_record(
275       info, rec_buff + ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER), reclength2);
276   my_free(rec_buff);
277   return (error);
278 }
279 
_mi_update_blob_record(MI_INFO * info,my_off_t pos,const uchar * record)280 int _mi_update_blob_record(MI_INFO *info, my_off_t pos, const uchar *record) {
281   uchar *rec_buff;
282   int error;
283   ulong reclength, extra;
284 
285   extra = (ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER) + MI_SPLIT_LENGTH +
286            MI_DYN_DELETE_BLOCK_HEADER);
287   reclength = (info->s->base.pack_reclength +
288                _my_calc_total_blob_length(info, record) + extra);
289   if (!(rec_buff = (uchar *)my_malloc(mi_key_memory_record_buffer, reclength,
290                                       MYF(0)))) {
291     set_my_errno(HA_ERR_OUT_OF_MEM); /* purecov: inspected */
292     return (-1);
293   }
294   reclength = _mi_rec_pack(info, rec_buff + ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER),
295                            record);
296   error = update_dynamic_record(
297       info, pos, rec_buff + ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER), reclength);
298   my_free(rec_buff);
299   return (error);
300 }
301 
_mi_delete_dynamic_record(MI_INFO * info)302 int _mi_delete_dynamic_record(MI_INFO *info) {
303   return delete_dynamic_record(info, info->lastpos, 0);
304 }
305 
306 /* Write record to data-file */
307 
write_dynamic_record(MI_INFO * info,uchar * record,ulong reclength)308 static int write_dynamic_record(MI_INFO *info, uchar *record, ulong reclength) {
309   int flag;
310   ulong length;
311   my_off_t filepos;
312   DBUG_TRACE;
313 
314   flag = 0;
315 
316   /*
317     Check if we have enough room for the new record.
318     First we do simplified check to make usual case faster.
319     Then we do more precise check for the space left.
320     Though it still is not absolutely precise, as
321     we always use MI_MAX_DYN_BLOCK_HEADER while it can be
322     less in the most of the cases.
323   */
324 
325   if (unlikely(info->s->base.max_data_file_length -
326                    info->state->data_file_length <
327                reclength + MI_MAX_DYN_BLOCK_HEADER)) {
328     if (info->s->base.max_data_file_length - info->state->data_file_length +
329             info->state->empty - info->state->del * MI_MAX_DYN_BLOCK_HEADER <
330         reclength + MI_MAX_DYN_BLOCK_HEADER) {
331       set_my_errno(HA_ERR_RECORD_FILE_FULL);
332       return 1;
333     }
334   }
335 
336   do {
337     if (_mi_find_writepos(info, reclength, &filepos, &length)) goto err;
338     if (_mi_write_part_record(
339             info, filepos, length,
340             (info->append_insert_at_end ? HA_OFFSET_ERROR
341                                         : info->s->state.dellink),
342             &record, &reclength, &flag))
343       goto err;
344   } while (reclength);
345 
346   return 0;
347 err:
348   return 1;
349 }
350 
351 /* Get a block for data ; The given data-area must be used !! */
352 
_mi_find_writepos(MI_INFO * info,ulong reclength,my_off_t * filepos,ulong * length)353 static int _mi_find_writepos(MI_INFO *info, ulong reclength, /* record length */
354                              my_off_t *filepos, /* Return file pos */
355                              ulong *length)     /* length of block at filepos */
356 {
357   MI_BLOCK_INFO block_info;
358   ulong tmp;
359   DBUG_TRACE;
360 
361   if (info->s->state.dellink != HA_OFFSET_ERROR &&
362       !info->append_insert_at_end) {
363     /* Deleted blocks exists;  Get last used block */
364     *filepos = info->s->state.dellink;
365     block_info.second_read = 0;
366     info->rec_cache.seek_not_done = true;
367     if (!(_mi_get_block_info(&block_info, info->dfile, info->s->state.dellink) &
368           BLOCK_DELETED)) {
369       DBUG_PRINT("error", ("Delete link crashed"));
370       set_my_errno(HA_ERR_WRONG_IN_RECORD);
371       return -1;
372     }
373     info->s->state.dellink = block_info.next_filepos;
374     info->state->del--;
375     info->state->empty -= block_info.block_len;
376     *length = block_info.block_len;
377   } else {
378     /* No deleted blocks;  Allocate a new block */
379     *filepos = info->state->data_file_length;
380     if ((tmp = reclength + 3 + (reclength >= (65520 - 3))) <
381         info->s->base.min_block_length)
382       tmp = info->s->base.min_block_length;
383     else
384       tmp = ((tmp + MI_DYN_ALIGN_SIZE - 1) & (~(ulong)(MI_DYN_ALIGN_SIZE - 1)));
385     if (info->state->data_file_length >
386         (info->s->base.max_data_file_length - tmp)) {
387       set_my_errno(HA_ERR_RECORD_FILE_FULL);
388       return -1;
389     }
390     if (tmp > MI_MAX_BLOCK_LENGTH) tmp = MI_MAX_BLOCK_LENGTH;
391     *length = tmp;
392     info->state->data_file_length += tmp;
393     info->s->state.split++;
394     info->update |= HA_STATE_WRITE_AT_END;
395   }
396   return 0;
397 } /* _mi_find_writepos */
398 
399 /*
400   Unlink a deleted block from the deleted list.
401   This block will be combined with the preceding or next block to form
402   a big block.
403 */
404 
unlink_deleted_block(MI_INFO * info,MI_BLOCK_INFO * block_info)405 static bool unlink_deleted_block(MI_INFO *info, MI_BLOCK_INFO *block_info) {
406   DBUG_TRACE;
407   if (block_info->filepos == info->s->state.dellink) {
408     /* First deleted block;  We can just use this ! */
409     info->s->state.dellink = block_info->next_filepos;
410   } else {
411     MI_BLOCK_INFO tmp;
412     tmp.second_read = 0;
413     /* Unlink block from the previous block */
414     if (!(_mi_get_block_info(&tmp, info->dfile, block_info->prev_filepos) &
415           BLOCK_DELETED))
416       return true; /* Something is wrong */
417     mi_sizestore(tmp.header + 4, block_info->next_filepos);
418     if (info->s->file_write(info, tmp.header + 4, 8,
419                             block_info->prev_filepos + 4, MYF(MY_NABP)))
420       return true;
421     /* Unlink block from next block */
422     if (block_info->next_filepos != HA_OFFSET_ERROR) {
423       if (!(_mi_get_block_info(&tmp, info->dfile, block_info->next_filepos) &
424             BLOCK_DELETED))
425         return true; /* Something is wrong */
426       mi_sizestore(tmp.header + 12, block_info->prev_filepos);
427       if (info->s->file_write(info, tmp.header + 12, 8,
428                               block_info->next_filepos + 12, MYF(MY_NABP)))
429         return true;
430     }
431   }
432   /* We now have one less deleted block */
433   info->state->del--;
434   info->state->empty -= block_info->block_len;
435   info->s->state.split--;
436 
437   /*
438     If this was a block that we where accessing through table scan
439     (mi_rrnd() or mi_scan(), then ensure that we skip over this block
440     when doing next mi_rrnd() or mi_scan().
441   */
442   if (info->nextpos == block_info->filepos)
443     info->nextpos += block_info->block_len;
444   return false;
445 }
446 
447 /*
448   Add a backward link to delete block
449 
450   SYNOPSIS
451     update_backward_delete_link()
452     info		MyISAM handler
453     delete_block	Position to delete block to update.
454                         If this is 'HA_OFFSET_ERROR', nothing will be done
455     filepos		Position to block that 'delete_block' should point to
456 
457   RETURN
458     0  ok
459     1  error.  In this case my_error is set.
460 */
461 
update_backward_delete_link(MI_INFO * info,my_off_t delete_block,my_off_t filepos)462 static int update_backward_delete_link(MI_INFO *info, my_off_t delete_block,
463                                        my_off_t filepos) {
464   MI_BLOCK_INFO block_info;
465   DBUG_TRACE;
466 
467   if (delete_block != HA_OFFSET_ERROR) {
468     block_info.second_read = 0;
469     if (_mi_get_block_info(&block_info, info->dfile, delete_block) &
470         BLOCK_DELETED) {
471       uchar buff[8];
472       mi_sizestore(buff, filepos);
473       if (info->s->file_write(info, buff, 8, delete_block + 12, MYF(MY_NABP)))
474         return 1; /* Error on write */
475     } else {
476       set_my_errno(HA_ERR_WRONG_IN_RECORD);
477       return 1; /* Wrong delete link */
478     }
479   }
480   return 0;
481 }
482 
483 /* Delete datarecord from database */
484 /* info->rec_cache.seek_not_done is updated in cmp_record */
485 
delete_dynamic_record(MI_INFO * info,my_off_t filepos,uint second_read)486 static int delete_dynamic_record(MI_INFO *info, my_off_t filepos,
487                                  uint second_read) {
488   uint length, b_type;
489   MI_BLOCK_INFO block_info, del_block;
490   int error;
491   bool remove_next_block;
492   DBUG_TRACE;
493 
494   /* First add a link from the last block to the new one */
495   error = update_backward_delete_link(info, info->s->state.dellink, filepos);
496 
497   block_info.second_read = second_read;
498   do {
499     /* Remove block at 'filepos' */
500     if ((b_type = _mi_get_block_info(&block_info, info->dfile, filepos)) &
501             (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
502              BLOCK_FATAL_ERROR) ||
503         (length = (uint)(block_info.filepos - filepos) + block_info.block_len) <
504             MI_MIN_BLOCK_LENGTH) {
505       set_my_errno(HA_ERR_WRONG_IN_RECORD);
506       return 1;
507     }
508     /* Check if next block is a delete block */
509     del_block.second_read = 0;
510     remove_next_block = false;
511     if (_mi_get_block_info(&del_block, info->dfile, filepos + length) &
512             BLOCK_DELETED &&
513         del_block.block_len + length < MI_DYN_MAX_BLOCK_LENGTH) {
514       /* We can't remove this yet as this block may be the head block */
515       remove_next_block = true;
516       length += del_block.block_len;
517     }
518 
519     block_info.header[0] = 0;
520     mi_int3store(block_info.header + 1, length);
521     mi_sizestore(block_info.header + 4, info->s->state.dellink);
522     if (b_type & BLOCK_LAST)
523       memset(block_info.header + 12, 255, 8);
524     else
525       mi_sizestore(block_info.header + 12, block_info.next_filepos);
526     if (info->s->file_write(info, (uchar *)block_info.header, 20, filepos,
527                             MYF(MY_NABP)))
528       return 1;
529     info->s->state.dellink = filepos;
530     info->state->del++;
531     info->state->empty += length;
532     filepos = block_info.next_filepos;
533 
534     /* Now it's safe to unlink the deleted block directly after this one */
535     if (remove_next_block && unlink_deleted_block(info, &del_block)) error = 1;
536   } while (!(b_type & BLOCK_LAST));
537 
538   return error;
539 }
540 
541 /* Write a block to datafile */
542 
_mi_write_part_record(MI_INFO * info,my_off_t filepos,ulong length,my_off_t next_filepos,uchar ** record,ulong * reclength,int * flag)543 int _mi_write_part_record(MI_INFO *info,
544                           my_off_t filepos,      /* points at empty block */
545                           ulong length,          /* length of block */
546                           my_off_t next_filepos, /* Next empty block */
547                           uchar **record,        /* pointer to record ptr */
548                           ulong *reclength,      /* length of *record */
549                           int *flag)             /* *flag == 0 if header */
550 {
551   ulong head_length, res_length, extra_length, long_block, del_length;
552   uchar *pos, *record_end;
553   my_off_t next_delete_block;
554   uchar temp[MI_SPLIT_LENGTH + MI_DYN_DELETE_BLOCK_HEADER];
555   DBUG_TRACE;
556 
557   next_delete_block = HA_OFFSET_ERROR;
558 
559   res_length = extra_length = 0;
560   if (length > *reclength + MI_SPLIT_LENGTH) { /* Splitt big block */
561     res_length = MY_ALIGN(length - *reclength - MI_EXTEND_BLOCK_LENGTH,
562                           MI_DYN_ALIGN_SIZE);
563     length -= res_length; /* Use this for first part */
564   }
565   long_block = (length < 65520L && *reclength < 65520L) ? 0 : 1;
566   if (length == *reclength + 3 + long_block) {
567     /* Block is exactly of the right length */
568     temp[0] = (uchar)(1 + *flag) + (uchar)long_block; /* Flag is 0 or 6 */
569     if (long_block) {
570       mi_int3store(temp + 1, *reclength);
571       head_length = 4;
572     } else {
573       mi_int2store(temp + 1, *reclength);
574       head_length = 3;
575     }
576   } else if (length - long_block < *reclength + 4) { /* To short block */
577     if (next_filepos == HA_OFFSET_ERROR)
578       next_filepos = (info->s->state.dellink != HA_OFFSET_ERROR &&
579                               !info->append_insert_at_end
580                           ? info->s->state.dellink
581                           : info->state->data_file_length);
582     if (*flag == 0) /* First block */
583     {
584       if (*reclength > MI_MAX_BLOCK_LENGTH) {
585         head_length = 16;
586         temp[0] = 13;
587         mi_int4store(temp + 1, *reclength);
588         mi_int3store(temp + 5, length - head_length);
589         mi_sizestore((uchar *)temp + 8, next_filepos);
590       } else {
591         head_length = 5 + 8 + long_block * 2;
592         temp[0] = 5 + (uchar)long_block;
593         if (long_block) {
594           mi_int3store(temp + 1, *reclength);
595           mi_int3store(temp + 4, length - head_length);
596           mi_sizestore((uchar *)temp + 7, next_filepos);
597         } else {
598           mi_int2store(temp + 1, *reclength);
599           mi_int2store(temp + 3, length - head_length);
600           mi_sizestore((uchar *)temp + 5, next_filepos);
601         }
602       }
603     } else {
604       head_length = 3 + 8 + long_block;
605       temp[0] = 11 + (uchar)long_block;
606       if (long_block) {
607         mi_int3store(temp + 1, length - head_length);
608         mi_sizestore((uchar *)temp + 4, next_filepos);
609       } else {
610         mi_int2store(temp + 1, length - head_length);
611         mi_sizestore((uchar *)temp + 3, next_filepos);
612       }
613     }
614   } else { /* Block with empty info last */
615     head_length = 4 + long_block;
616     extra_length = length - *reclength - head_length;
617     temp[0] = (uchar)(3 + *flag) + (uchar)long_block; /* 3,4 or 9,10 */
618     if (long_block) {
619       mi_int3store(temp + 1, *reclength);
620       temp[4] = (uchar)(extra_length);
621     } else {
622       mi_int2store(temp + 1, *reclength);
623       temp[3] = (uchar)(extra_length);
624     }
625     length = *reclength + head_length; /* Write only what is needed */
626   }
627   DBUG_DUMP("header", (uchar *)temp, head_length);
628 
629   /* Make a long block for one write */
630   record_end = *record + length - head_length;
631   del_length = (res_length ? MI_DYN_DELETE_BLOCK_HEADER : 0);
632   memmove((uchar *)(*record - head_length), (uchar *)temp, head_length);
633   memcpy(temp, record_end, (size_t)(extra_length + del_length));
634   memset(record_end, 0, extra_length);
635 
636   if (res_length) {
637     /* Check first if we can join this block with the next one */
638     MI_BLOCK_INFO del_block;
639     my_off_t next_block = filepos + length + extra_length + res_length;
640 
641     del_block.second_read = 0;
642     if (next_block < info->state->data_file_length &&
643         info->s->state.dellink != HA_OFFSET_ERROR) {
644       if ((_mi_get_block_info(&del_block, info->dfile, next_block) &
645            BLOCK_DELETED) &&
646           res_length + del_block.block_len < MI_DYN_MAX_BLOCK_LENGTH) {
647         if (unlink_deleted_block(info, &del_block)) goto err;
648         res_length += del_block.block_len;
649       }
650     }
651 
652     /* Create a delete link of the last part of the block */
653     pos = record_end + extra_length;
654     pos[0] = '\0';
655     mi_int3store(pos + 1, res_length);
656     mi_sizestore(pos + 4, info->s->state.dellink);
657     memset(pos + 12, 255, 8); /* End link */
658     next_delete_block = info->s->state.dellink;
659     info->s->state.dellink = filepos + length + extra_length;
660     info->state->del++;
661     info->state->empty += res_length;
662     info->s->state.split++;
663   }
664   if (info->opt_flag & WRITE_CACHE_USED &&
665       info->update & HA_STATE_WRITE_AT_END) {
666     if (info->update & HA_STATE_EXTEND_BLOCK) {
667       info->update &= ~HA_STATE_EXTEND_BLOCK;
668       if (my_block_write(&info->rec_cache, (uchar *)*record - head_length,
669                          length + extra_length + del_length, filepos))
670         goto err;
671     } else if (my_b_write(&info->rec_cache, (uchar *)*record - head_length,
672                           length + extra_length + del_length))
673       goto err;
674   } else {
675     info->rec_cache.seek_not_done = true;
676     if (info->s->file_write(info, (uchar *)*record - head_length,
677                             length + extra_length + del_length, filepos,
678                             info->s->write_flag))
679       goto err;
680   }
681   memcpy(record_end, temp, (size_t)(extra_length + del_length));
682   *record = record_end;
683   *reclength -= (length - head_length);
684   *flag = 6;
685 
686   if (del_length) {
687     /* link the next delete block to this */
688     if (update_backward_delete_link(info, next_delete_block,
689                                     info->s->state.dellink))
690       goto err;
691   }
692 
693   return 0;
694 err:
695   DBUG_PRINT("exit", ("errno: %d", my_errno()));
696   return 1;
697 } /*_mi_write_part_record */
698 
699 /* update record from datafile */
700 
update_dynamic_record(MI_INFO * info,my_off_t filepos,uchar * record,ulong reclength)701 static int update_dynamic_record(MI_INFO *info, my_off_t filepos, uchar *record,
702                                  ulong reclength) {
703   int flag;
704   uint error;
705   ulong length;
706   MI_BLOCK_INFO block_info;
707   DBUG_TRACE;
708 
709   flag = block_info.second_read = 0;
710   /*
711      Check if we have enough room for the record.
712      First we do simplified check to make usual case faster.
713      Then we do more precise check for the space left.
714      Though it still is not absolutely precise, as
715      we always use MI_MAX_DYN_BLOCK_HEADER while it can be
716      less in the most of the cases.
717   */
718 
719   /*
720     compare with just the reclength as we're going
721     to get some space from the old replaced record
722   */
723   if (unlikely(info->s->base.max_data_file_length -
724                    info->state->data_file_length <
725                reclength)) {
726     /*
727        let's read the old record's block to find out the length of the
728        old record
729     */
730     if ((error = _mi_get_block_info(&block_info, info->dfile, filepos)) &
731         (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | BLOCK_FATAL_ERROR)) {
732       DBUG_PRINT("error", ("Got wrong block info"));
733       if (!(error & BLOCK_FATAL_ERROR)) set_my_errno(HA_ERR_WRONG_IN_RECORD);
734       goto err;
735     }
736 
737     /*
738       if new record isn't longer, we can go on safely
739     */
740     if (block_info.rec_len < reclength) {
741       if (info->s->base.max_data_file_length - info->state->data_file_length +
742               info->state->empty - info->state->del * MI_MAX_DYN_BLOCK_HEADER <
743           reclength - block_info.rec_len + MI_MAX_DYN_BLOCK_HEADER) {
744         set_my_errno(HA_ERR_RECORD_FILE_FULL);
745         goto err;
746       }
747     }
748     block_info.second_read = 0;
749   }
750 
751   while (reclength > 0) {
752     if (filepos != info->s->state.dellink) {
753       block_info.next_filepos = HA_OFFSET_ERROR;
754       if ((error = _mi_get_block_info(&block_info, info->dfile, filepos)) &
755           (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
756            BLOCK_FATAL_ERROR)) {
757         DBUG_PRINT("error", ("Got wrong block info"));
758         if (!(error & BLOCK_FATAL_ERROR)) set_my_errno(HA_ERR_WRONG_IN_RECORD);
759         goto err;
760       }
761       length = (ulong)(block_info.filepos - filepos) + block_info.block_len;
762       if (length < reclength) {
763         uint tmp = MY_ALIGN(reclength - length + 3 + (reclength >= 65520L),
764                             MI_DYN_ALIGN_SIZE);
765         /* Don't create a block bigger than MI_MAX_BLOCK_LENGTH */
766         tmp = std::min(length + tmp, MI_MAX_BLOCK_LENGTH) - length;
767         /* Check if we can extend this block */
768         if (block_info.filepos + block_info.block_len ==
769                 info->state->data_file_length &&
770             info->state->data_file_length <
771                 info->s->base.max_data_file_length - tmp) {
772           /* extend file */
773           DBUG_PRINT("info", ("Extending file with %d bytes", tmp));
774           if (info->nextpos == info->state->data_file_length)
775             info->nextpos += tmp;
776           info->state->data_file_length += tmp;
777           info->update |= HA_STATE_WRITE_AT_END | HA_STATE_EXTEND_BLOCK;
778           length += tmp;
779         } else if (length < MI_MAX_BLOCK_LENGTH - MI_MIN_BLOCK_LENGTH) {
780           /*
781             Check if next block is a deleted block
782             Above we have MI_MIN_BLOCK_LENGTH to avoid the problem where
783             the next block is so small it can't be splited which could
784             casue problems
785           */
786 
787           MI_BLOCK_INFO del_block;
788           del_block.second_read = 0;
789           if (_mi_get_block_info(&del_block, info->dfile,
790                                  block_info.filepos + block_info.block_len) &
791               BLOCK_DELETED) {
792             /* Use; Unlink it and extend the current block */
793             DBUG_PRINT("info", ("Extending current block"));
794             if (unlink_deleted_block(info, &del_block)) goto err;
795             if ((length += del_block.block_len) > MI_MAX_BLOCK_LENGTH) {
796               /*
797                 New block was too big, link overflow part back to
798                 delete list
799               */
800               my_off_t next_pos;
801               ulong rest_length = length - MI_MAX_BLOCK_LENGTH;
802               rest_length = std::max(rest_length, ulong(MI_MIN_BLOCK_LENGTH));
803               next_pos = del_block.filepos + del_block.block_len - rest_length;
804 
805               if (update_backward_delete_link(info, info->s->state.dellink,
806                                               next_pos))
807                 return 1;
808 
809               /* create delete link for data that didn't fit into the page */
810               del_block.header[0] = 0;
811               mi_int3store(del_block.header + 1, rest_length);
812               mi_sizestore(del_block.header + 4, info->s->state.dellink);
813               memset(del_block.header + 12, 255, 8);
814               if (info->s->file_write(info, (uchar *)del_block.header, 20,
815                                       next_pos, MYF(MY_NABP)))
816                 return 1;
817               info->s->state.dellink = next_pos;
818               info->s->state.split++;
819               info->state->del++;
820               info->state->empty += rest_length;
821               length -= rest_length;
822             }
823           }
824         }
825       }
826     } else {
827       if (_mi_find_writepos(info, reclength, &filepos, &length)) goto err;
828     }
829     if (_mi_write_part_record(info, filepos, length, block_info.next_filepos,
830                               &record, &reclength, &flag))
831       goto err;
832     if ((filepos = block_info.next_filepos) == HA_OFFSET_ERROR) {
833       /* Start writing data on deleted blocks */
834       filepos = info->s->state.dellink;
835     }
836   }
837 
838   if (block_info.next_filepos != HA_OFFSET_ERROR) {
839     /*
840       delete_dynamic_record() may change data file position.
841       IO cache must be notified as it may still have cached
842       data, which has to be flushed later.
843     */
844     info->rec_cache.seek_not_done = true;
845     if (delete_dynamic_record(info, block_info.next_filepos, 1)) goto err;
846   }
847   return 0;
848 err:
849   return 1;
850 }
851 
852 /* Pack a record. Return new reclength */
853 
_mi_rec_pack(MI_INFO * info,uchar * to,const uchar * from)854 uint _mi_rec_pack(MI_INFO *info, uchar *to, const uchar *from) {
855   uint length, new_length, flag, bit, i;
856   const uchar *pos, *end, *startpos;
857   uchar *packpos;
858   enum en_fieldtype type;
859   MI_COLUMNDEF *rec;
860   MI_BLOB *blob;
861   DBUG_TRACE;
862 
863   flag = 0;
864   bit = 1;
865   startpos = packpos = to;
866   to += info->s->base.pack_bits;
867   blob = info->blobs;
868   rec = info->s->rec;
869 
870   for (i = info->s->base.fields; i-- > 0; from += length, rec++) {
871     length = (uint)rec->length;
872     if ((type = (enum en_fieldtype)rec->type) != FIELD_NORMAL) {
873       if (type == FIELD_BLOB) {
874         if (!blob->length)
875           flag |= bit;
876         else {
877           char *temp_pos;
878           size_t tmp_length = length - portable_sizeof_char_ptr;
879           memcpy((uchar *)to, from, tmp_length);
880           memcpy(&temp_pos, from + tmp_length, sizeof(char *));
881           memcpy(to + tmp_length, temp_pos, (size_t)blob->length);
882           to += tmp_length + blob->length;
883         }
884         blob++;
885       } else if (type == FIELD_SKIP_ZERO) {
886         if (memcmp(from, zero_string, length) == 0)
887           flag |= bit;
888         else {
889           memcpy((uchar *)to, from, (size_t)length);
890           to += length;
891         }
892       } else if (type == FIELD_SKIP_ENDSPACE || type == FIELD_SKIP_PRESPACE) {
893         pos = from;
894         end = from + length;
895         if (type == FIELD_SKIP_ENDSPACE) { /* Pack trailing spaces */
896           while (end > from && *(end - 1) == ' ') end--;
897         } else { /* Pack pref-spaces */
898           while (pos < end && *pos == ' ') pos++;
899         }
900         new_length = (uint)(end - pos);
901         if (new_length + 1 + (rec->length > 255 && new_length > 127) < length) {
902           if (rec->length > 255 && new_length > 127) {
903             to[0] = (uchar)((new_length & 127) + 128);
904             to[1] = (uchar)(new_length >> 7);
905             to += 2;
906           } else
907             *to++ = (uchar)new_length;
908           memcpy((uchar *)to, pos, (size_t)new_length);
909           to += new_length;
910           flag |= bit;
911         } else {
912           memcpy(to, from, (size_t)length);
913           to += length;
914         }
915       } else if (type == FIELD_VARCHAR) {
916         uint pack_length = HA_VARCHAR_PACKLENGTH(rec->length - 1);
917         uint tmp_length;
918         if (pack_length == 1) {
919           tmp_length = (uint)*from;
920           *to++ = *from;
921         } else {
922           tmp_length = uint2korr(from);
923           store_key_length_inc(to, tmp_length);
924         }
925         memcpy(to, from + pack_length, tmp_length);
926         to += tmp_length;
927         continue;
928       } else {
929         memcpy(to, from, (size_t)length);
930         to += length;
931         continue; /* Normal field */
932       }
933       if ((bit = bit << 1) >= 256) {
934         *packpos++ = (uchar)flag;
935         bit = 1;
936         flag = 0;
937       }
938     } else {
939       memcpy(to, from, (size_t)length);
940       to += length;
941     }
942   }
943   if (bit != 1) *packpos = (uchar)flag;
944   if (info->s->calc_checksum) *to++ = (uchar)info->checksum;
945   DBUG_PRINT("exit", ("packed length: %d", (int)(to - startpos)));
946   return (uint)(to - startpos);
947 } /* _mi_rec_pack */
948 
949 /*
950   Check if a record was correctly packed. Used only by myisamchk
951   Returns 0 if record is ok.
952 */
953 
_mi_rec_check(MI_INFO * info,const uchar * record,uchar * rec_buff,ulong packed_length,bool with_checksum)954 bool _mi_rec_check(MI_INFO *info, const uchar *record, uchar *rec_buff,
955                    ulong packed_length, bool with_checksum) {
956   uint length, new_length, flag, bit, i;
957   const uchar *pos, *end, *packpos, *to;
958   enum en_fieldtype type;
959   MI_COLUMNDEF *rec;
960   DBUG_TRACE;
961 
962   packpos = rec_buff;
963   to = rec_buff + info->s->base.pack_bits;
964   rec = info->s->rec;
965   flag = *packpos;
966   bit = 1;
967 
968   for (i = info->s->base.fields; i-- > 0; record += length, rec++) {
969     length = (uint)rec->length;
970     if ((type = (enum en_fieldtype)rec->type) != FIELD_NORMAL) {
971       if (type == FIELD_BLOB) {
972         uint blob_length =
973             _mi_calc_blob_length(length - portable_sizeof_char_ptr, record);
974         if (!blob_length && !(flag & bit)) goto err;
975         if (blob_length) to += length - portable_sizeof_char_ptr + blob_length;
976       } else if (type == FIELD_SKIP_ZERO) {
977         if (memcmp(record, zero_string, length) == 0) {
978           if (!(flag & bit)) goto err;
979         } else
980           to += length;
981       } else if (type == FIELD_SKIP_ENDSPACE || type == FIELD_SKIP_PRESPACE) {
982         pos = record;
983         end = record + length;
984         if (type == FIELD_SKIP_ENDSPACE) { /* Pack trailing spaces */
985           while (end > record && *(end - 1) == ' ') end--;
986         } else { /* Pack pre-spaces */
987           while (pos < end && *pos == ' ') pos++;
988         }
989         new_length = (uint)(end - pos);
990         if (new_length + 1 + (rec->length > 255 && new_length > 127) < length) {
991           if (!(flag & bit)) goto err;
992           if (rec->length > 255 && new_length > 127) {
993             /* purecov: begin inspected */
994             if (to[0] != (uchar)((new_length & 127) + 128) ||
995                 to[1] != (uchar)(new_length >> 7))
996               goto err;
997             to += 2;
998             /* purecov: end */
999           } else if (*to++ != (uchar)new_length)
1000             goto err;
1001           to += new_length;
1002         } else
1003           to += length;
1004       } else if (type == FIELD_VARCHAR) {
1005         uint pack_length = HA_VARCHAR_PACKLENGTH(rec->length - 1);
1006         uint tmp_length;
1007         if (pack_length == 1) {
1008           tmp_length = (uint)*record;
1009           to += 1 + tmp_length;
1010           continue;
1011         } else {
1012           tmp_length = uint2korr(record);
1013           to += get_pack_length(tmp_length) + tmp_length;
1014         }
1015         continue;
1016       } else {
1017         to += length;
1018         continue; /* Normal field */
1019       }
1020       if ((bit = bit << 1) >= 256) {
1021         flag = *++packpos;
1022         bit = 1;
1023       }
1024     } else
1025       to += length;
1026   }
1027   if (packed_length !=
1028           (uint)(to - rec_buff) + (info->s->calc_checksum != nullptr) ||
1029       (bit != 1 && (flag & ~(bit - 1))))
1030     goto err;
1031   if (with_checksum && ((uchar)info->checksum != (uchar)*to)) {
1032     DBUG_PRINT("error", ("wrong checksum for row"));
1033     goto err;
1034   }
1035   return false;
1036 
1037 err:
1038   return true;
1039 }
1040 
1041 /* Unpacks a record */
1042 /* Returns -1 and my_errno =HA_ERR_RECORD_DELETED if reclength isn't */
1043 /* right. Returns reclength (>0) if ok */
1044 
_mi_rec_unpack(MI_INFO * info,uchar * to,const uchar * from,ulong found_length)1045 ulong _mi_rec_unpack(MI_INFO *info, uchar *to, const uchar *from,
1046                      ulong found_length) {
1047   uint flag, bit, length, rec_length, min_pack_length;
1048   enum en_fieldtype type;
1049   uchar *to_end;
1050   MI_COLUMNDEF *rec, *end_field;
1051   DBUG_TRACE;
1052 
1053   to_end = to + info->s->base.reclength;
1054   const uchar *from_end = from + found_length;
1055   flag = (uchar)*from;
1056   bit = 1;
1057   const uchar *packpos = from;
1058   if (found_length < info->s->base.min_pack_length) goto err;
1059   from += info->s->base.pack_bits;
1060   min_pack_length = info->s->base.min_pack_length - info->s->base.pack_bits;
1061 
1062   for (rec = info->s->rec, end_field = rec + info->s->base.fields;
1063        rec < end_field; to += rec_length, rec++) {
1064     rec_length = rec->length;
1065     if ((type = (enum en_fieldtype)rec->type) != FIELD_NORMAL &&
1066         (type != FIELD_CHECK)) {
1067       if (type == FIELD_VARCHAR) {
1068         uint pack_length = HA_VARCHAR_PACKLENGTH(rec_length - 1);
1069         if (pack_length == 1) {
1070           length = (uint)*from;
1071           if (length > rec_length - 1) goto err;
1072           *to = *from++;
1073         } else {
1074           length = get_key_length(&from);
1075           if (length > rec_length - 2) goto err;
1076           int2store(to, length);
1077         }
1078         if (from + length > from_end) goto err;
1079         memcpy(to + pack_length, from, length);
1080         from += length;
1081         min_pack_length--;
1082         continue;
1083       }
1084       if (flag & bit) {
1085         if (type == FIELD_BLOB || type == FIELD_SKIP_ZERO)
1086           memset(to, 0, rec_length);
1087         else if (type == FIELD_SKIP_ENDSPACE || type == FIELD_SKIP_PRESPACE) {
1088           if (rec->length > 255 && *from & 128) {
1089             if (from + 1 >= from_end) goto err;
1090             length = (*from & 127) + ((uint)(uchar) * (from + 1) << 7);
1091             from += 2;
1092           } else {
1093             if (from == from_end) goto err;
1094             length = (uchar)*from++;
1095           }
1096           min_pack_length--;
1097           if (length >= rec_length ||
1098               min_pack_length + length > (uint)(from_end - from))
1099             goto err;
1100           if (type == FIELD_SKIP_ENDSPACE) {
1101             memcpy(to, from, (size_t)length);
1102             memset(to + length, ' ', rec_length - length);
1103           } else {
1104             memset(to, ' ', rec_length - length);
1105             memcpy(to + rec_length - length, from, (size_t)length);
1106           }
1107           from += length;
1108         }
1109       } else if (type == FIELD_BLOB) {
1110         uint size_length = rec_length - portable_sizeof_char_ptr;
1111         ulong blob_length = _mi_calc_blob_length(size_length, from);
1112         ulong from_left = (ulong)(from_end - from);
1113         if (from_left < size_length || from_left - size_length < blob_length ||
1114             from_left - size_length - blob_length < min_pack_length)
1115           goto err;
1116         memcpy(to, from, (size_t)size_length);
1117         from += size_length;
1118         memcpy(to + size_length, &from, sizeof(char *));
1119         from += blob_length;
1120       } else {
1121         if (type == FIELD_SKIP_ENDSPACE || type == FIELD_SKIP_PRESPACE)
1122           min_pack_length--;
1123         if (min_pack_length + rec_length > (uint)(from_end - from)) goto err;
1124         memcpy(to, from, (size_t)rec_length);
1125         from += rec_length;
1126       }
1127       if ((bit = bit << 1) >= 256) {
1128         flag = (uchar) * ++packpos;
1129         bit = 1;
1130       }
1131     } else {
1132       if (min_pack_length > (uint)(from_end - from)) goto err;
1133       min_pack_length -= rec_length;
1134       memcpy(to, from, (size_t)rec_length);
1135       from += rec_length;
1136     }
1137   }
1138   if (info->s->calc_checksum) from++;
1139   if (to == to_end && from == from_end && (bit == 1 || !(flag & ~(bit - 1))))
1140     return found_length;
1141 
1142 err:
1143   set_my_errno(HA_ERR_WRONG_IN_RECORD);
1144   DBUG_PRINT("error", ("to_end: %p -> %p  from_end: %p -> %p", to, to_end, from,
1145                        from_end));
1146   DBUG_DUMP("from", (uchar *)info->rec_buff, info->s->base.min_pack_length);
1147   return MY_FILE_ERROR;
1148 } /* _mi_rec_unpack */
1149 
1150 /* Calc length of blob. Update info in blobs->length */
1151 
_my_calc_total_blob_length(MI_INFO * info,const uchar * record)1152 ulong _my_calc_total_blob_length(MI_INFO *info, const uchar *record) {
1153   ulong length;
1154   MI_BLOB *blob, *end;
1155 
1156   for (length = 0, blob = info->blobs, end = blob + info->s->base.blobs;
1157        blob != end; blob++) {
1158     blob->length =
1159         _mi_calc_blob_length(blob->pack_length, record + blob->offset);
1160     length += blob->length;
1161   }
1162   return length;
1163 }
1164 
_mi_calc_blob_length(uint length,const uchar * pos)1165 ulong _mi_calc_blob_length(uint length, const uchar *pos) {
1166   switch (length) {
1167     case 1:
1168       return (uint)(uchar)*pos;
1169     case 2:
1170       return (uint)uint2korr(pos);
1171     case 3:
1172       return uint3korr(pos);
1173     case 4:
1174       return uint4korr(pos);
1175     default:
1176       break;
1177   }
1178   return 0; /* Impossible */
1179 }
1180 
_mi_store_blob_length(uchar * pos,uint pack_length,uint length)1181 void _mi_store_blob_length(uchar *pos, uint pack_length, uint length) {
1182   switch (pack_length) {
1183     case 1:
1184       *pos = (uchar)length;
1185       break;
1186     case 2:
1187       int2store(pos, length);
1188       break;
1189     case 3:
1190       int3store(pos, length);
1191       break;
1192     case 4:
1193       int4store(pos, length);
1194     default:
1195       break;
1196   }
1197   return;
1198 }
1199 
1200 /*
1201   Read record from datafile.
1202 
1203   SYNOPSIS
1204     _mi_read_dynamic_record()
1205       info                      MI_INFO pointer to table.
1206       filepos                   From where to read the record.
1207       buf                       Destination for record.
1208 
1209   NOTE
1210 
1211     If a write buffer is active, it needs to be flushed if its contents
1212     intersects with the record to read. We always check if the position
1213     of the first byte of the write buffer is lower than the position
1214     past the last byte to read. In theory this is also true if the write
1215     buffer is completely below the read segment. That is, if there is no
1216     intersection. But this case is unusual. We flush anyway. Only if the
1217     first byte in the write buffer is above the last byte to read, we do
1218     not flush.
1219 
1220     A dynamic record may need several reads. So this check must be done
1221     before every read. Reading a dynamic record starts with reading the
1222     block header. If the record does not fit into the free space of the
1223     header, the block may be longer than the header. In this case a
1224     second read is necessary. These one or two reads repeat for every
1225     part of the record.
1226 
1227   RETURN
1228     0           OK
1229     -1          Error
1230 */
1231 
_mi_read_dynamic_record(MI_INFO * info,my_off_t filepos,uchar * buf)1232 int _mi_read_dynamic_record(MI_INFO *info, my_off_t filepos, uchar *buf) {
1233   int block_of_record;
1234   uint b_type, left_length = 0;
1235   uchar *to = nullptr;
1236   MI_BLOCK_INFO block_info;
1237   File file;
1238   DBUG_TRACE;
1239 
1240   if (filepos != HA_OFFSET_ERROR) {
1241     file = info->dfile;
1242     block_of_record = 0; /* First block of record is numbered as zero. */
1243     block_info.second_read = 0;
1244     do {
1245       /* A corrupted table can have wrong pointers. (Bug# 19835) */
1246       if (filepos == HA_OFFSET_ERROR) goto panic;
1247       if (info->opt_flag & WRITE_CACHE_USED &&
1248           info->rec_cache.pos_in_file < filepos + MI_BLOCK_INFO_HEADER_LENGTH &&
1249           flush_io_cache(&info->rec_cache))
1250         goto err;
1251       info->rec_cache.seek_not_done = true;
1252       if ((b_type = _mi_get_block_info(&block_info, file, filepos)) &
1253           (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
1254            BLOCK_FATAL_ERROR)) {
1255         if (b_type & (BLOCK_SYNC_ERROR | BLOCK_DELETED))
1256           set_my_errno(HA_ERR_RECORD_DELETED);
1257         goto err;
1258       }
1259       if (block_of_record++ == 0) /* First block */
1260       {
1261         if (block_info.rec_len > (uint)info->s->base.max_pack_length)
1262           goto panic;
1263         if (info->s->base.blobs) {
1264           if (!(to = mi_alloc_rec_buff(info, block_info.rec_len,
1265                                        &info->rec_buff)))
1266             goto err;
1267         } else
1268           to = info->rec_buff;
1269         left_length = block_info.rec_len;
1270       }
1271       if (left_length < block_info.data_len || !block_info.data_len)
1272         goto panic; /* Wrong linked record */
1273       /* copy information that is already read */
1274       {
1275         uint offset = (uint)(block_info.filepos - filepos);
1276         uint prefetch_len = (sizeof(block_info.header) - offset);
1277         filepos += sizeof(block_info.header);
1278 
1279         if (prefetch_len > block_info.data_len)
1280           prefetch_len = block_info.data_len;
1281         if (prefetch_len) {
1282           memcpy((uchar *)to, block_info.header + offset, prefetch_len);
1283           block_info.data_len -= prefetch_len;
1284           left_length -= prefetch_len;
1285           to += prefetch_len;
1286         }
1287       }
1288       /* read rest of record from file */
1289       if (block_info.data_len) {
1290         if (info->opt_flag & WRITE_CACHE_USED &&
1291             info->rec_cache.pos_in_file < filepos + block_info.data_len &&
1292             flush_io_cache(&info->rec_cache))
1293           goto err;
1294         /*
1295           What a pity that this method is not called 'file_pread' and that
1296           there is no equivalent without seeking. We are at the right
1297           position already. :(
1298         */
1299         if (info->s->file_read(info, (uchar *)to, block_info.data_len, filepos,
1300                                MYF(MY_NABP)))
1301           goto panic;
1302         left_length -= block_info.data_len;
1303         to += block_info.data_len;
1304       }
1305       filepos = block_info.next_filepos;
1306     } while (left_length);
1307 
1308     info->update |= HA_STATE_AKTIV; /* We have a aktive record */
1309     fast_mi_writeinfo(info);
1310     return _mi_rec_unpack(info, buf, info->rec_buff, block_info.rec_len) !=
1311                    MY_FILE_ERROR
1312                ? 0
1313                : -1;
1314   }
1315   fast_mi_writeinfo(info);
1316   return -1; /* Wrong data to read */
1317 
1318 panic:
1319   set_my_errno(HA_ERR_WRONG_IN_RECORD);
1320 err:
1321   (void)_mi_writeinfo(info, 0);
1322   return -1;
1323 }
1324 
1325 /* compare unique constraint between stored rows */
1326 
_mi_cmp_dynamic_unique(MI_INFO * info,MI_UNIQUEDEF * def,const uchar * record,my_off_t pos)1327 int _mi_cmp_dynamic_unique(MI_INFO *info, MI_UNIQUEDEF *def,
1328                            const uchar *record, my_off_t pos) {
1329   uchar *rec_buff, *old_record;
1330   int error;
1331   DBUG_TRACE;
1332 
1333   if (!(old_record = (uchar *)my_malloc(mi_key_memory_record_buffer,
1334                                         info->s->base.reclength, MYF(0))))
1335     return 1;
1336 
1337   /* Don't let the compare destroy blobs that may be in use */
1338   rec_buff = info->rec_buff;
1339   if (info->s->base.blobs) info->rec_buff = nullptr;
1340   error = _mi_read_dynamic_record(info, pos, old_record);
1341   if (!error)
1342     error = mi_unique_comp(def, record, old_record, def->null_are_equal);
1343   if (info->s->base.blobs) {
1344     my_free(mi_get_rec_buff_ptr(info, info->rec_buff));
1345     info->rec_buff = rec_buff;
1346   }
1347   my_free(old_record);
1348   return error;
1349 }
1350 
1351 /* Compare of record one disk with packed record in memory */
1352 
_mi_cmp_dynamic_record(MI_INFO * info,const uchar * record)1353 int _mi_cmp_dynamic_record(MI_INFO *info, const uchar *record) {
1354   uint flag, reclength, b_type;
1355   my_off_t filepos;
1356   uchar *buffer;
1357   MI_BLOCK_INFO block_info;
1358   DBUG_TRACE;
1359 
1360   if (info->opt_flag & WRITE_CACHE_USED) {
1361     info->update &= ~(HA_STATE_WRITE_AT_END | HA_STATE_EXTEND_BLOCK);
1362     if (flush_io_cache(&info->rec_cache)) return -1;
1363   }
1364   info->rec_cache.seek_not_done = true;
1365 
1366   /* If nobody have touched the database we don't have to test rec */
1367 
1368   buffer = info->rec_buff;
1369   if ((info->opt_flag & READ_CHECK_USED)) { /* If check isn't disabled  */
1370     if (info->s->base.blobs) {
1371       if (!(buffer =
1372                 (uchar *)my_malloc(mi_key_memory_record_buffer,
1373                                    info->s->base.pack_reclength +
1374                                        _my_calc_total_blob_length(info, record),
1375                                    MYF(0))))
1376         return -1;
1377     }
1378     reclength = _mi_rec_pack(info, buffer, record);
1379     record = buffer;
1380 
1381     filepos = info->lastpos;
1382     flag = block_info.second_read = 0;
1383     block_info.next_filepos = filepos;
1384     while (reclength > 0) {
1385       if ((b_type = _mi_get_block_info(&block_info, info->dfile,
1386                                        block_info.next_filepos)) &
1387           (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
1388            BLOCK_FATAL_ERROR)) {
1389         if (b_type & (BLOCK_SYNC_ERROR | BLOCK_DELETED))
1390           set_my_errno(HA_ERR_RECORD_CHANGED);
1391         goto err;
1392       }
1393       if (flag == 0) /* First block */
1394       {
1395         flag = 1;
1396         if (reclength != block_info.rec_len) {
1397           set_my_errno(HA_ERR_RECORD_CHANGED);
1398           goto err;
1399         }
1400       } else if (reclength < block_info.data_len) {
1401         set_my_errno(HA_ERR_WRONG_IN_RECORD);
1402         goto err;
1403       }
1404       reclength -= block_info.data_len;
1405       if (_mi_cmp_buffer(info->dfile, record, block_info.filepos,
1406                          block_info.data_len)) {
1407         set_my_errno(HA_ERR_RECORD_CHANGED);
1408         goto err;
1409       }
1410       flag = 1;
1411       record += block_info.data_len;
1412     }
1413   }
1414   set_my_errno(0);
1415 err:
1416   if (buffer != info->rec_buff) my_free((uchar *)buffer);
1417   return my_errno();
1418 }
1419 
1420 /* Compare file to buffert */
1421 
_mi_cmp_buffer(File file,const uchar * buff,my_off_t filepos,uint length)1422 static int _mi_cmp_buffer(File file, const uchar *buff, my_off_t filepos,
1423                           uint length) {
1424   uint next_length;
1425   uchar temp_buff[IO_SIZE * 2];
1426   DBUG_TRACE;
1427 
1428   next_length = IO_SIZE * 2 - (uint)(filepos & (IO_SIZE - 1));
1429 
1430   while (length > IO_SIZE * 2) {
1431     if (mysql_file_pread(file, temp_buff, next_length, filepos, MYF(MY_NABP)) ||
1432         memcmp(buff, temp_buff, next_length))
1433       goto err;
1434     filepos += next_length;
1435     buff += next_length;
1436     length -= next_length;
1437     next_length = IO_SIZE * 2;
1438   }
1439   if (mysql_file_pread(file, temp_buff, length, filepos, MYF(MY_NABP)))
1440     goto err;
1441   return memcmp(buff, temp_buff, length);
1442 err:
1443   return 1;
1444 }
1445 
1446 /*
1447   Read record from datafile.
1448 
1449   SYNOPSIS
1450     _mi_read_rnd_dynamic_record()
1451       info                      MI_INFO pointer to table.
1452       buf                       Destination for record.
1453       filepos                   From where to read the record.
1454       skip_deleted_blocks       If to repeat reading until a non-deleted
1455                                 record is found.
1456 
1457   NOTE
1458 
1459     If a write buffer is active, it needs to be flushed if its contents
1460     intersects with the record to read. We always check if the position
1461     of the first byte of the write buffer is lower than the position
1462     past the last byte to read. In theory this is also true if the write
1463     buffer is completely below the read segment. That is, if there is no
1464     intersection. But this case is unusual. We flush anyway. Only if the
1465     first byte in the write buffer is above the last byte to read, we do
1466     not flush.
1467 
1468     A dynamic record may need several reads. So this check must be done
1469     before every read. Reading a dynamic record starts with reading the
1470     block header. If the record does not fit into the free space of the
1471     header, the block may be longer than the header. In this case a
1472     second read is necessary. These one or two reads repeat for every
1473     part of the record.
1474 
1475   RETURN
1476     0           OK
1477     != 0        Error
1478 */
1479 
_mi_read_rnd_dynamic_record(MI_INFO * info,uchar * buf,my_off_t filepos,bool skip_deleted_blocks)1480 int _mi_read_rnd_dynamic_record(MI_INFO *info, uchar *buf, my_off_t filepos,
1481                                 bool skip_deleted_blocks) {
1482   int block_of_record, info_read, save_errno;
1483   uint left_len, b_type;
1484   uchar *to = nullptr;
1485   MI_BLOCK_INFO block_info;
1486   MYISAM_SHARE *share = info->s;
1487   DBUG_TRACE;
1488 
1489   info_read = 0;
1490 
1491   DBUG_EXECUTE_IF("catch_file_offset_deviation", {
1492     if (filepos) return HA_ERR_RECORD_DELETED;
1493   });
1494 
1495   if (info->lock_type == F_UNLCK) {
1496     if (share->tot_locks == 0) {
1497       if (my_lock(share->kfile, F_RDLCK,
1498                   MYF(MY_SEEK_NOT_DONE) | info->lock_wait))
1499         return my_errno();
1500     }
1501   } else
1502     info_read = 1; /* memory-keyinfoblock is ok */
1503 
1504   block_of_record = 0; /* First block of record is numbered as zero. */
1505   block_info.second_read = 0;
1506   left_len = 1;
1507   do {
1508     if (filepos >= info->state->data_file_length) {
1509       if (!info_read) { /* Check if changed */
1510         info_read = 1;
1511         info->rec_cache.seek_not_done = true;
1512         if (mi_state_info_read_dsk(share->kfile, &share->state, true))
1513           goto panic;
1514       }
1515       if (filepos >= info->state->data_file_length) {
1516         set_my_errno(HA_ERR_END_OF_FILE);
1517         goto err;
1518       }
1519     }
1520     if (info->opt_flag & READ_CACHE_USED) {
1521       if (_mi_read_cache(
1522               &info->rec_cache, (uchar *)block_info.header, filepos,
1523               sizeof(block_info.header),
1524               (!block_of_record && skip_deleted_blocks ? READING_NEXT : 0) |
1525                   READING_HEADER))
1526         goto panic;
1527       b_type = _mi_get_block_info(&block_info, -1, filepos);
1528     } else {
1529       if (info->opt_flag & WRITE_CACHE_USED &&
1530           info->rec_cache.pos_in_file < filepos + MI_BLOCK_INFO_HEADER_LENGTH &&
1531           flush_io_cache(&info->rec_cache))
1532         return my_errno();
1533       info->rec_cache.seek_not_done = true;
1534       b_type = _mi_get_block_info(&block_info, info->dfile, filepos);
1535     }
1536 
1537     if (b_type &
1538         (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | BLOCK_FATAL_ERROR)) {
1539       if ((b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR)) &&
1540           skip_deleted_blocks) {
1541         filepos = block_info.filepos + block_info.block_len;
1542         block_info.second_read = 0;
1543         continue; /* Search after next_record */
1544       }
1545       if (b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR)) {
1546         /*
1547           If we're not on the first block of a record and
1548           the block is marked as deleted or out of sync,
1549           something's gone wrong: the record is damaged.
1550         */
1551         if (block_of_record != 0) goto panic;
1552 
1553         set_my_errno(HA_ERR_RECORD_DELETED);
1554         info->lastpos = block_info.filepos;
1555         info->nextpos = block_info.filepos + block_info.block_len;
1556       }
1557       goto err;
1558     }
1559     if (block_of_record == 0) /* First block */
1560     {
1561       if (block_info.rec_len > (uint)share->base.max_pack_length) goto panic;
1562       info->lastpos = filepos;
1563       if (share->base.blobs) {
1564         if (!(to =
1565                   mi_alloc_rec_buff(info, block_info.rec_len, &info->rec_buff)))
1566           goto err;
1567       } else
1568         to = info->rec_buff;
1569       left_len = block_info.rec_len;
1570     }
1571     if (left_len < block_info.data_len) goto panic; /* Wrong linked record */
1572 
1573     /* copy information that is already read */
1574     {
1575       uint offset = (uint)(block_info.filepos - filepos);
1576       uint tmp_length = (sizeof(block_info.header) - offset);
1577       filepos = block_info.filepos;
1578 
1579       if (tmp_length > block_info.data_len) tmp_length = block_info.data_len;
1580       if (tmp_length) {
1581         memcpy((uchar *)to, block_info.header + offset, tmp_length);
1582         block_info.data_len -= tmp_length;
1583         left_len -= tmp_length;
1584         to += tmp_length;
1585         filepos += tmp_length;
1586       }
1587     }
1588     /* read rest of record from file */
1589     if (block_info.data_len) {
1590       if (info->opt_flag & READ_CACHE_USED) {
1591         if (_mi_read_cache(
1592                 &info->rec_cache, (uchar *)to, filepos, block_info.data_len,
1593                 (!block_of_record && skip_deleted_blocks) ? READING_NEXT : 0))
1594           goto panic;
1595       } else {
1596         if (info->opt_flag & WRITE_CACHE_USED &&
1597             info->rec_cache.pos_in_file <
1598                 block_info.filepos + block_info.data_len &&
1599             flush_io_cache(&info->rec_cache))
1600           goto err;
1601         /* mysql_file_seek(info->dfile, filepos, MY_SEEK_SET, MYF(0)); */
1602         if (mysql_file_read(info->dfile, (uchar *)to, block_info.data_len,
1603                             MYF(MY_NABP))) {
1604           if (my_errno() == -1)
1605             set_my_errno(HA_ERR_WRONG_IN_RECORD); /* Unexpected end of file */
1606           goto err;
1607         }
1608       }
1609     }
1610     /*
1611       Increment block-of-record counter. If it was the first block,
1612       remember the position behind the block for the next call.
1613     */
1614     if (block_of_record++ == 0) {
1615       info->nextpos = block_info.filepos + block_info.block_len;
1616       skip_deleted_blocks = false;
1617     }
1618     left_len -= block_info.data_len;
1619     to += block_info.data_len;
1620     filepos = block_info.next_filepos;
1621   } while (left_len);
1622 
1623   info->update |= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED;
1624   fast_mi_writeinfo(info);
1625   if (_mi_rec_unpack(info, buf, info->rec_buff, block_info.rec_len) !=
1626       MY_FILE_ERROR)
1627     return 0;
1628   return my_errno(); /* Wrong record */
1629 
1630 panic:
1631   set_my_errno(HA_ERR_WRONG_IN_RECORD); /* Something is fatal wrong */
1632 err:
1633   save_errno = my_errno();
1634   (void)_mi_writeinfo(info, 0);
1635   set_my_errno(save_errno);
1636   return save_errno;
1637 }
1638 
1639 /* Read and process header from a dynamic-record-file */
1640 
_mi_get_block_info(MI_BLOCK_INFO * info,File file,my_off_t filepos)1641 uint _mi_get_block_info(MI_BLOCK_INFO *info, File file, my_off_t filepos) {
1642   uint return_val = 0;
1643   uchar *header = info->header;
1644 
1645   if (file >= 0) {
1646     /*
1647       We do not use mysql_file_pread() here because we want to have the file
1648       pointer set to the end of the header after this function.
1649       mysql_file_pread() may leave the file pointer untouched.
1650     */
1651     mysql_file_seek(file, filepos, MY_SEEK_SET, MYF(0));
1652     if (mysql_file_read(file, header, sizeof(info->header), MYF(0)) !=
1653         sizeof(info->header))
1654       goto err;
1655   }
1656   DBUG_DUMP("header", header, MI_BLOCK_INFO_HEADER_LENGTH);
1657   if (info->second_read) {
1658     if (info->header[0] <= 6 || info->header[0] == 13)
1659       return_val = BLOCK_SYNC_ERROR;
1660   } else {
1661     if (info->header[0] > 6 && info->header[0] != 13)
1662       return_val = BLOCK_SYNC_ERROR;
1663   }
1664   info->next_filepos = HA_OFFSET_ERROR; /* Dummy if no next block */
1665 
1666   switch (info->header[0]) {
1667     case 0:
1668       if ((info->block_len = (uint)mi_uint3korr(header + 1)) <
1669               MI_MIN_BLOCK_LENGTH ||
1670           (info->block_len & (MI_DYN_ALIGN_SIZE - 1)))
1671         goto err;
1672       info->filepos = filepos;
1673       info->next_filepos = mi_sizekorr(header + 4);
1674       info->prev_filepos = mi_sizekorr(header + 12);
1675       return return_val | BLOCK_DELETED; /* Deleted block */
1676 
1677     case 1:
1678       info->rec_len = info->data_len = info->block_len =
1679           mi_uint2korr(header + 1);
1680       info->filepos = filepos + 3;
1681       return return_val | BLOCK_FIRST | BLOCK_LAST;
1682     case 2:
1683       info->rec_len = info->data_len = info->block_len =
1684           mi_uint3korr(header + 1);
1685       info->filepos = filepos + 4;
1686       return return_val | BLOCK_FIRST | BLOCK_LAST;
1687 
1688     case 13:
1689       info->rec_len = mi_uint4korr(header + 1);
1690       info->block_len = info->data_len = mi_uint3korr(header + 5);
1691       info->next_filepos = mi_sizekorr(header + 8);
1692       info->second_read = 1;
1693       info->filepos = filepos + 16;
1694       return return_val | BLOCK_FIRST;
1695 
1696     case 3:
1697       info->rec_len = info->data_len = mi_uint2korr(header + 1);
1698       info->block_len = info->rec_len + (uint)header[3];
1699       info->filepos = filepos + 4;
1700       return return_val | BLOCK_FIRST | BLOCK_LAST;
1701     case 4:
1702       info->rec_len = info->data_len = mi_uint3korr(header + 1);
1703       info->block_len = info->rec_len + (uint)header[4];
1704       info->filepos = filepos + 5;
1705       return return_val | BLOCK_FIRST | BLOCK_LAST;
1706 
1707     case 5:
1708       info->rec_len = mi_uint2korr(header + 1);
1709       info->block_len = info->data_len = mi_uint2korr(header + 3);
1710       info->next_filepos = mi_sizekorr(header + 5);
1711       info->second_read = 1;
1712       info->filepos = filepos + 13;
1713       return return_val | BLOCK_FIRST;
1714     case 6:
1715       info->rec_len = mi_uint3korr(header + 1);
1716       info->block_len = info->data_len = mi_uint3korr(header + 4);
1717       info->next_filepos = mi_sizekorr(header + 7);
1718       info->second_read = 1;
1719       info->filepos = filepos + 15;
1720       return return_val | BLOCK_FIRST;
1721 
1722       /* The following blocks are identical to 1-6 without rec_len */
1723     case 7:
1724       info->data_len = info->block_len = mi_uint2korr(header + 1);
1725       info->filepos = filepos + 3;
1726       return return_val | BLOCK_LAST;
1727     case 8:
1728       info->data_len = info->block_len = mi_uint3korr(header + 1);
1729       info->filepos = filepos + 4;
1730       return return_val | BLOCK_LAST;
1731 
1732     case 9:
1733       info->data_len = mi_uint2korr(header + 1);
1734       info->block_len = info->data_len + (uint)header[3];
1735       info->filepos = filepos + 4;
1736       return return_val | BLOCK_LAST;
1737     case 10:
1738       info->data_len = mi_uint3korr(header + 1);
1739       info->block_len = info->data_len + (uint)header[4];
1740       info->filepos = filepos + 5;
1741       return return_val | BLOCK_LAST;
1742 
1743     case 11:
1744       info->data_len = info->block_len = mi_uint2korr(header + 1);
1745       info->next_filepos = mi_sizekorr(header + 3);
1746       info->second_read = 1;
1747       info->filepos = filepos + 11;
1748       return return_val;
1749     case 12:
1750       info->data_len = info->block_len = mi_uint3korr(header + 1);
1751       info->next_filepos = mi_sizekorr(header + 4);
1752       info->second_read = 1;
1753       info->filepos = filepos + 12;
1754       return return_val;
1755   }
1756 
1757 err:
1758   set_my_errno(HA_ERR_WRONG_IN_RECORD); /* Garbage */
1759   return BLOCK_ERROR;
1760 }
1761