1 /* Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23 /*
24 Functions to handle space-packed-records and blobs
25
26 A row may be stored in one or more linked blocks.
27 The block size is between MI_MIN_BLOCK_LENGTH and MI_MAX_BLOCK_LENGTH.
28 Each block is aligned on MI_DYN_ALIGN_SIZE.
29 The reson for the max block size is to not have too many different types
30 of blocks. For the differnet block types, look at _mi_get_block_info()
31 */
32
33 #include "my_config.h"
34
35 #include <fcntl.h>
36 #include <sys/types.h>
37
38 #include <algorithm>
39
40 #include "my_byteorder.h"
41 #include "my_compiler.h"
42 #include "my_dbug.h"
43 #include "my_inttypes.h"
44 #include "my_io.h"
45 #include "my_macros.h"
46 #include "my_pointer_arithmetic.h"
47 #include "sql/field.h"
48 #include "storage/myisam/myisam_sys.h"
49 #include "storage/myisam/myisamdef.h"
50
51 /* Enough for comparing if number is zero */
52 static char zero_string[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
53
54 static int write_dynamic_record(MI_INFO *info, uchar *record, ulong reclength);
55 static int _mi_find_writepos(MI_INFO *info, ulong reclength, my_off_t *filepos,
56 ulong *length);
57 static int update_dynamic_record(MI_INFO *info, my_off_t filepos, uchar *record,
58 ulong reclength);
59 static int delete_dynamic_record(MI_INFO *info, my_off_t filepos,
60 uint second_read);
61 static int _mi_cmp_buffer(File file, const uchar *buff, my_off_t filepos,
62 uint length);
63
64 /* Interface function from MI_INFO */
65
66 /*
67 Create mmaped area for MyISAM handler
68
69 SYNOPSIS
70 mi_dynmap_file()
71 info MyISAM handler
72
73 RETURN
74 0 ok
75 1 error.
76 */
77
mi_dynmap_file(MI_INFO * info,my_off_t size)78 bool mi_dynmap_file(MI_INFO *info, my_off_t size) {
79 DBUG_TRACE;
80 if (size == 0 || size > (my_off_t)(~((size_t)0))) {
81 if (size)
82 DBUG_PRINT("warning", ("File is too large for mmap"));
83 else
84 DBUG_PRINT("warning", ("Do not mmap zero-length"));
85 return true;
86 }
87 /*
88 I wonder if it is good to use MAP_NORESERVE. From the Linux man page:
89 MAP_NORESERVE
90 Do not reserve swap space for this mapping. When swap space is
91 reserved, one has the guarantee that it is possible to modify the
92 mapping. When swap space is not reserved one might get SIGSEGV
93 upon a write if no physical memory is available.
94 */
95 info->s->file_map = (uchar *)my_mmap(
96 nullptr, (size_t)size,
97 info->s->mode == O_RDONLY ? PROT_READ : PROT_READ | PROT_WRITE,
98 MAP_SHARED | MAP_NORESERVE, info->dfile, 0L);
99 if (info->s->file_map == (uchar *)MAP_FAILED) {
100 info->s->file_map = nullptr;
101 return true;
102 }
103 #if defined(HAVE_MADVISE)
104 madvise((char *)info->s->file_map, size, MADV_RANDOM);
105 #endif
106 info->s->mmaped_length = size;
107 info->s->file_read = mi_mmap_pread;
108 info->s->file_write = mi_mmap_pwrite;
109 return false;
110 }
111
112 /*
113 Destroy mmaped area for MyISAM handler
114
115 SYNOPSIS
116 mi_munmap_file()
117 info MyISAM handler
118
119 RETURN
120 0 ok
121 !0 error.
122 */
123
mi_munmap_file(MI_INFO * info)124 int mi_munmap_file(MI_INFO *info) {
125 int ret;
126 DBUG_TRACE;
127 if ((ret = my_munmap((void *)info->s->file_map,
128 (size_t)info->s->mmaped_length)))
129 return ret;
130 info->s->file_read = mi_nommap_pread;
131 info->s->file_write = mi_nommap_pwrite;
132 info->s->file_map = nullptr;
133 info->s->mmaped_length = 0;
134 return 0;
135 }
136
137 /*
138 Resize mmaped area for MyISAM handler
139
140 SYNOPSIS
141 mi_remap_file()
142 info MyISAM handler
143
144 RETURN
145 */
146
mi_remap_file(MI_INFO * info,my_off_t size)147 void mi_remap_file(MI_INFO *info, my_off_t size) {
148 if (info->s->file_map) {
149 mi_munmap_file(info);
150 mi_dynmap_file(info, size);
151 }
152 }
153
154 /*
155 Read bytes from MySAM handler, using mmap or pread
156
157 SYNOPSIS
158 mi_mmap_pread()
159 info MyISAM handler
160 Buffer Input buffer
161 Count Count of bytes for read
162 offset Start position
163 MyFlags
164
165 RETURN
166 0 ok
167 */
168
mi_mmap_pread(MI_INFO * info,uchar * Buffer,size_t Count,my_off_t offset,myf MyFlags)169 size_t mi_mmap_pread(MI_INFO *info, uchar *Buffer, size_t Count,
170 my_off_t offset, myf MyFlags) {
171 DBUG_PRINT("info", ("mi_read with mmap %d\n", info->dfile));
172 if (info->s->concurrent_insert) mysql_rwlock_rdlock(&info->s->mmap_lock);
173
174 /*
175 The following test may fail in the following cases:
176 - We failed to remap a memory area (fragmented memory?)
177 - This thread has done some writes, but not yet extended the
178 memory mapped area.
179 */
180
181 if (info->s->mmaped_length >= offset + Count) {
182 memcpy(Buffer, info->s->file_map + offset, Count);
183 if (info->s->concurrent_insert) mysql_rwlock_unlock(&info->s->mmap_lock);
184 return 0;
185 } else {
186 if (info->s->concurrent_insert) mysql_rwlock_unlock(&info->s->mmap_lock);
187 return mysql_file_pread(info->dfile, Buffer, Count, offset, MyFlags);
188 }
189 }
190
191 /* wrapper for mysql_file_pread in case if mmap isn't used */
192
mi_nommap_pread(MI_INFO * info,uchar * Buffer,size_t Count,my_off_t offset,myf MyFlags)193 size_t mi_nommap_pread(MI_INFO *info, uchar *Buffer, size_t Count,
194 my_off_t offset, myf MyFlags) {
195 return mysql_file_pread(info->dfile, Buffer, Count, offset, MyFlags);
196 }
197
198 /*
199 Write bytes to MySAM handler, using mmap or pwrite
200
201 SYNOPSIS
202 mi_mmap_pwrite()
203 info MyISAM handler
204 Buffer Output buffer
205 Count Count of bytes for write
206 offset Start position
207 MyFlags
208
209 RETURN
210 0 ok
211 !=0 error. In this case return error from pwrite
212 */
213
mi_mmap_pwrite(MI_INFO * info,const uchar * Buffer,size_t Count,my_off_t offset,myf MyFlags)214 size_t mi_mmap_pwrite(MI_INFO *info, const uchar *Buffer, size_t Count,
215 my_off_t offset, myf MyFlags) {
216 DBUG_PRINT("info", ("mi_write with mmap %d\n", info->dfile));
217 if (info->s->concurrent_insert) mysql_rwlock_rdlock(&info->s->mmap_lock);
218
219 /*
220 The following test may fail in the following cases:
221 - We failed to remap a memory area (fragmented memory?)
222 - This thread has done some writes, but not yet extended the
223 memory mapped area.
224 */
225
226 if (info->s->mmaped_length >= offset + Count) {
227 memcpy(info->s->file_map + offset, Buffer, Count);
228 if (info->s->concurrent_insert) mysql_rwlock_unlock(&info->s->mmap_lock);
229 return 0;
230 } else {
231 info->s->nonmmaped_inserts++;
232 if (info->s->concurrent_insert) mysql_rwlock_unlock(&info->s->mmap_lock);
233 return mysql_file_pwrite(info->dfile, Buffer, Count, offset, MyFlags);
234 }
235 }
236
237 /* wrapper for mysql_file_pwrite in case if mmap isn't used */
238
mi_nommap_pwrite(MI_INFO * info,const uchar * Buffer,size_t Count,my_off_t offset,myf MyFlags)239 size_t mi_nommap_pwrite(MI_INFO *info, const uchar *Buffer, size_t Count,
240 my_off_t offset, myf MyFlags) {
241 return mysql_file_pwrite(info->dfile, Buffer, Count, offset, MyFlags);
242 }
243
_mi_write_dynamic_record(MI_INFO * info,const uchar * record)244 int _mi_write_dynamic_record(MI_INFO *info, const uchar *record) {
245 ulong reclength = _mi_rec_pack(info, info->rec_buff, record);
246 return (write_dynamic_record(info, info->rec_buff, reclength));
247 }
248
_mi_update_dynamic_record(MI_INFO * info,my_off_t pos,const uchar * record)249 int _mi_update_dynamic_record(MI_INFO *info, my_off_t pos,
250 const uchar *record) {
251 uint length = _mi_rec_pack(info, info->rec_buff, record);
252 return (update_dynamic_record(info, pos, info->rec_buff, length));
253 }
254
_mi_write_blob_record(MI_INFO * info,const uchar * record)255 int _mi_write_blob_record(MI_INFO *info, const uchar *record) {
256 uchar *rec_buff;
257 int error;
258 ulong reclength, reclength2, extra;
259
260 extra = (ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER) + MI_SPLIT_LENGTH +
261 MI_DYN_DELETE_BLOCK_HEADER + 1);
262 reclength = (info->s->base.pack_reclength +
263 _my_calc_total_blob_length(info, record) + extra);
264 if (!(rec_buff = (uchar *)my_malloc(mi_key_memory_record_buffer, reclength,
265 MYF(0)))) {
266 set_my_errno(HA_ERR_OUT_OF_MEM); /* purecov: inspected */
267 return (-1);
268 }
269 reclength2 = _mi_rec_pack(
270 info, rec_buff + ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER), record);
271 DBUG_PRINT("info",
272 ("reclength: %lu reclength2: %lu", reclength, reclength2));
273 DBUG_ASSERT(reclength2 <= reclength);
274 error = write_dynamic_record(
275 info, rec_buff + ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER), reclength2);
276 my_free(rec_buff);
277 return (error);
278 }
279
_mi_update_blob_record(MI_INFO * info,my_off_t pos,const uchar * record)280 int _mi_update_blob_record(MI_INFO *info, my_off_t pos, const uchar *record) {
281 uchar *rec_buff;
282 int error;
283 ulong reclength, extra;
284
285 extra = (ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER) + MI_SPLIT_LENGTH +
286 MI_DYN_DELETE_BLOCK_HEADER);
287 reclength = (info->s->base.pack_reclength +
288 _my_calc_total_blob_length(info, record) + extra);
289 if (!(rec_buff = (uchar *)my_malloc(mi_key_memory_record_buffer, reclength,
290 MYF(0)))) {
291 set_my_errno(HA_ERR_OUT_OF_MEM); /* purecov: inspected */
292 return (-1);
293 }
294 reclength = _mi_rec_pack(info, rec_buff + ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER),
295 record);
296 error = update_dynamic_record(
297 info, pos, rec_buff + ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER), reclength);
298 my_free(rec_buff);
299 return (error);
300 }
301
_mi_delete_dynamic_record(MI_INFO * info)302 int _mi_delete_dynamic_record(MI_INFO *info) {
303 return delete_dynamic_record(info, info->lastpos, 0);
304 }
305
306 /* Write record to data-file */
307
write_dynamic_record(MI_INFO * info,uchar * record,ulong reclength)308 static int write_dynamic_record(MI_INFO *info, uchar *record, ulong reclength) {
309 int flag;
310 ulong length;
311 my_off_t filepos;
312 DBUG_TRACE;
313
314 flag = 0;
315
316 /*
317 Check if we have enough room for the new record.
318 First we do simplified check to make usual case faster.
319 Then we do more precise check for the space left.
320 Though it still is not absolutely precise, as
321 we always use MI_MAX_DYN_BLOCK_HEADER while it can be
322 less in the most of the cases.
323 */
324
325 if (unlikely(info->s->base.max_data_file_length -
326 info->state->data_file_length <
327 reclength + MI_MAX_DYN_BLOCK_HEADER)) {
328 if (info->s->base.max_data_file_length - info->state->data_file_length +
329 info->state->empty - info->state->del * MI_MAX_DYN_BLOCK_HEADER <
330 reclength + MI_MAX_DYN_BLOCK_HEADER) {
331 set_my_errno(HA_ERR_RECORD_FILE_FULL);
332 return 1;
333 }
334 }
335
336 do {
337 if (_mi_find_writepos(info, reclength, &filepos, &length)) goto err;
338 if (_mi_write_part_record(
339 info, filepos, length,
340 (info->append_insert_at_end ? HA_OFFSET_ERROR
341 : info->s->state.dellink),
342 &record, &reclength, &flag))
343 goto err;
344 } while (reclength);
345
346 return 0;
347 err:
348 return 1;
349 }
350
351 /* Get a block for data ; The given data-area must be used !! */
352
_mi_find_writepos(MI_INFO * info,ulong reclength,my_off_t * filepos,ulong * length)353 static int _mi_find_writepos(MI_INFO *info, ulong reclength, /* record length */
354 my_off_t *filepos, /* Return file pos */
355 ulong *length) /* length of block at filepos */
356 {
357 MI_BLOCK_INFO block_info;
358 ulong tmp;
359 DBUG_TRACE;
360
361 if (info->s->state.dellink != HA_OFFSET_ERROR &&
362 !info->append_insert_at_end) {
363 /* Deleted blocks exists; Get last used block */
364 *filepos = info->s->state.dellink;
365 block_info.second_read = 0;
366 info->rec_cache.seek_not_done = true;
367 if (!(_mi_get_block_info(&block_info, info->dfile, info->s->state.dellink) &
368 BLOCK_DELETED)) {
369 DBUG_PRINT("error", ("Delete link crashed"));
370 set_my_errno(HA_ERR_WRONG_IN_RECORD);
371 return -1;
372 }
373 info->s->state.dellink = block_info.next_filepos;
374 info->state->del--;
375 info->state->empty -= block_info.block_len;
376 *length = block_info.block_len;
377 } else {
378 /* No deleted blocks; Allocate a new block */
379 *filepos = info->state->data_file_length;
380 if ((tmp = reclength + 3 + (reclength >= (65520 - 3))) <
381 info->s->base.min_block_length)
382 tmp = info->s->base.min_block_length;
383 else
384 tmp = ((tmp + MI_DYN_ALIGN_SIZE - 1) & (~(ulong)(MI_DYN_ALIGN_SIZE - 1)));
385 if (info->state->data_file_length >
386 (info->s->base.max_data_file_length - tmp)) {
387 set_my_errno(HA_ERR_RECORD_FILE_FULL);
388 return -1;
389 }
390 if (tmp > MI_MAX_BLOCK_LENGTH) tmp = MI_MAX_BLOCK_LENGTH;
391 *length = tmp;
392 info->state->data_file_length += tmp;
393 info->s->state.split++;
394 info->update |= HA_STATE_WRITE_AT_END;
395 }
396 return 0;
397 } /* _mi_find_writepos */
398
399 /*
400 Unlink a deleted block from the deleted list.
401 This block will be combined with the preceding or next block to form
402 a big block.
403 */
404
unlink_deleted_block(MI_INFO * info,MI_BLOCK_INFO * block_info)405 static bool unlink_deleted_block(MI_INFO *info, MI_BLOCK_INFO *block_info) {
406 DBUG_TRACE;
407 if (block_info->filepos == info->s->state.dellink) {
408 /* First deleted block; We can just use this ! */
409 info->s->state.dellink = block_info->next_filepos;
410 } else {
411 MI_BLOCK_INFO tmp;
412 tmp.second_read = 0;
413 /* Unlink block from the previous block */
414 if (!(_mi_get_block_info(&tmp, info->dfile, block_info->prev_filepos) &
415 BLOCK_DELETED))
416 return true; /* Something is wrong */
417 mi_sizestore(tmp.header + 4, block_info->next_filepos);
418 if (info->s->file_write(info, tmp.header + 4, 8,
419 block_info->prev_filepos + 4, MYF(MY_NABP)))
420 return true;
421 /* Unlink block from next block */
422 if (block_info->next_filepos != HA_OFFSET_ERROR) {
423 if (!(_mi_get_block_info(&tmp, info->dfile, block_info->next_filepos) &
424 BLOCK_DELETED))
425 return true; /* Something is wrong */
426 mi_sizestore(tmp.header + 12, block_info->prev_filepos);
427 if (info->s->file_write(info, tmp.header + 12, 8,
428 block_info->next_filepos + 12, MYF(MY_NABP)))
429 return true;
430 }
431 }
432 /* We now have one less deleted block */
433 info->state->del--;
434 info->state->empty -= block_info->block_len;
435 info->s->state.split--;
436
437 /*
438 If this was a block that we where accessing through table scan
439 (mi_rrnd() or mi_scan(), then ensure that we skip over this block
440 when doing next mi_rrnd() or mi_scan().
441 */
442 if (info->nextpos == block_info->filepos)
443 info->nextpos += block_info->block_len;
444 return false;
445 }
446
447 /*
448 Add a backward link to delete block
449
450 SYNOPSIS
451 update_backward_delete_link()
452 info MyISAM handler
453 delete_block Position to delete block to update.
454 If this is 'HA_OFFSET_ERROR', nothing will be done
455 filepos Position to block that 'delete_block' should point to
456
457 RETURN
458 0 ok
459 1 error. In this case my_error is set.
460 */
461
update_backward_delete_link(MI_INFO * info,my_off_t delete_block,my_off_t filepos)462 static int update_backward_delete_link(MI_INFO *info, my_off_t delete_block,
463 my_off_t filepos) {
464 MI_BLOCK_INFO block_info;
465 DBUG_TRACE;
466
467 if (delete_block != HA_OFFSET_ERROR) {
468 block_info.second_read = 0;
469 if (_mi_get_block_info(&block_info, info->dfile, delete_block) &
470 BLOCK_DELETED) {
471 uchar buff[8];
472 mi_sizestore(buff, filepos);
473 if (info->s->file_write(info, buff, 8, delete_block + 12, MYF(MY_NABP)))
474 return 1; /* Error on write */
475 } else {
476 set_my_errno(HA_ERR_WRONG_IN_RECORD);
477 return 1; /* Wrong delete link */
478 }
479 }
480 return 0;
481 }
482
483 /* Delete datarecord from database */
484 /* info->rec_cache.seek_not_done is updated in cmp_record */
485
delete_dynamic_record(MI_INFO * info,my_off_t filepos,uint second_read)486 static int delete_dynamic_record(MI_INFO *info, my_off_t filepos,
487 uint second_read) {
488 uint length, b_type;
489 MI_BLOCK_INFO block_info, del_block;
490 int error;
491 bool remove_next_block;
492 DBUG_TRACE;
493
494 /* First add a link from the last block to the new one */
495 error = update_backward_delete_link(info, info->s->state.dellink, filepos);
496
497 block_info.second_read = second_read;
498 do {
499 /* Remove block at 'filepos' */
500 if ((b_type = _mi_get_block_info(&block_info, info->dfile, filepos)) &
501 (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
502 BLOCK_FATAL_ERROR) ||
503 (length = (uint)(block_info.filepos - filepos) + block_info.block_len) <
504 MI_MIN_BLOCK_LENGTH) {
505 set_my_errno(HA_ERR_WRONG_IN_RECORD);
506 return 1;
507 }
508 /* Check if next block is a delete block */
509 del_block.second_read = 0;
510 remove_next_block = false;
511 if (_mi_get_block_info(&del_block, info->dfile, filepos + length) &
512 BLOCK_DELETED &&
513 del_block.block_len + length < MI_DYN_MAX_BLOCK_LENGTH) {
514 /* We can't remove this yet as this block may be the head block */
515 remove_next_block = true;
516 length += del_block.block_len;
517 }
518
519 block_info.header[0] = 0;
520 mi_int3store(block_info.header + 1, length);
521 mi_sizestore(block_info.header + 4, info->s->state.dellink);
522 if (b_type & BLOCK_LAST)
523 memset(block_info.header + 12, 255, 8);
524 else
525 mi_sizestore(block_info.header + 12, block_info.next_filepos);
526 if (info->s->file_write(info, (uchar *)block_info.header, 20, filepos,
527 MYF(MY_NABP)))
528 return 1;
529 info->s->state.dellink = filepos;
530 info->state->del++;
531 info->state->empty += length;
532 filepos = block_info.next_filepos;
533
534 /* Now it's safe to unlink the deleted block directly after this one */
535 if (remove_next_block && unlink_deleted_block(info, &del_block)) error = 1;
536 } while (!(b_type & BLOCK_LAST));
537
538 return error;
539 }
540
541 /* Write a block to datafile */
542
_mi_write_part_record(MI_INFO * info,my_off_t filepos,ulong length,my_off_t next_filepos,uchar ** record,ulong * reclength,int * flag)543 int _mi_write_part_record(MI_INFO *info,
544 my_off_t filepos, /* points at empty block */
545 ulong length, /* length of block */
546 my_off_t next_filepos, /* Next empty block */
547 uchar **record, /* pointer to record ptr */
548 ulong *reclength, /* length of *record */
549 int *flag) /* *flag == 0 if header */
550 {
551 ulong head_length, res_length, extra_length, long_block, del_length;
552 uchar *pos, *record_end;
553 my_off_t next_delete_block;
554 uchar temp[MI_SPLIT_LENGTH + MI_DYN_DELETE_BLOCK_HEADER];
555 DBUG_TRACE;
556
557 next_delete_block = HA_OFFSET_ERROR;
558
559 res_length = extra_length = 0;
560 if (length > *reclength + MI_SPLIT_LENGTH) { /* Splitt big block */
561 res_length = MY_ALIGN(length - *reclength - MI_EXTEND_BLOCK_LENGTH,
562 MI_DYN_ALIGN_SIZE);
563 length -= res_length; /* Use this for first part */
564 }
565 long_block = (length < 65520L && *reclength < 65520L) ? 0 : 1;
566 if (length == *reclength + 3 + long_block) {
567 /* Block is exactly of the right length */
568 temp[0] = (uchar)(1 + *flag) + (uchar)long_block; /* Flag is 0 or 6 */
569 if (long_block) {
570 mi_int3store(temp + 1, *reclength);
571 head_length = 4;
572 } else {
573 mi_int2store(temp + 1, *reclength);
574 head_length = 3;
575 }
576 } else if (length - long_block < *reclength + 4) { /* To short block */
577 if (next_filepos == HA_OFFSET_ERROR)
578 next_filepos = (info->s->state.dellink != HA_OFFSET_ERROR &&
579 !info->append_insert_at_end
580 ? info->s->state.dellink
581 : info->state->data_file_length);
582 if (*flag == 0) /* First block */
583 {
584 if (*reclength > MI_MAX_BLOCK_LENGTH) {
585 head_length = 16;
586 temp[0] = 13;
587 mi_int4store(temp + 1, *reclength);
588 mi_int3store(temp + 5, length - head_length);
589 mi_sizestore((uchar *)temp + 8, next_filepos);
590 } else {
591 head_length = 5 + 8 + long_block * 2;
592 temp[0] = 5 + (uchar)long_block;
593 if (long_block) {
594 mi_int3store(temp + 1, *reclength);
595 mi_int3store(temp + 4, length - head_length);
596 mi_sizestore((uchar *)temp + 7, next_filepos);
597 } else {
598 mi_int2store(temp + 1, *reclength);
599 mi_int2store(temp + 3, length - head_length);
600 mi_sizestore((uchar *)temp + 5, next_filepos);
601 }
602 }
603 } else {
604 head_length = 3 + 8 + long_block;
605 temp[0] = 11 + (uchar)long_block;
606 if (long_block) {
607 mi_int3store(temp + 1, length - head_length);
608 mi_sizestore((uchar *)temp + 4, next_filepos);
609 } else {
610 mi_int2store(temp + 1, length - head_length);
611 mi_sizestore((uchar *)temp + 3, next_filepos);
612 }
613 }
614 } else { /* Block with empty info last */
615 head_length = 4 + long_block;
616 extra_length = length - *reclength - head_length;
617 temp[0] = (uchar)(3 + *flag) + (uchar)long_block; /* 3,4 or 9,10 */
618 if (long_block) {
619 mi_int3store(temp + 1, *reclength);
620 temp[4] = (uchar)(extra_length);
621 } else {
622 mi_int2store(temp + 1, *reclength);
623 temp[3] = (uchar)(extra_length);
624 }
625 length = *reclength + head_length; /* Write only what is needed */
626 }
627 DBUG_DUMP("header", (uchar *)temp, head_length);
628
629 /* Make a long block for one write */
630 record_end = *record + length - head_length;
631 del_length = (res_length ? MI_DYN_DELETE_BLOCK_HEADER : 0);
632 memmove((uchar *)(*record - head_length), (uchar *)temp, head_length);
633 memcpy(temp, record_end, (size_t)(extra_length + del_length));
634 memset(record_end, 0, extra_length);
635
636 if (res_length) {
637 /* Check first if we can join this block with the next one */
638 MI_BLOCK_INFO del_block;
639 my_off_t next_block = filepos + length + extra_length + res_length;
640
641 del_block.second_read = 0;
642 if (next_block < info->state->data_file_length &&
643 info->s->state.dellink != HA_OFFSET_ERROR) {
644 if ((_mi_get_block_info(&del_block, info->dfile, next_block) &
645 BLOCK_DELETED) &&
646 res_length + del_block.block_len < MI_DYN_MAX_BLOCK_LENGTH) {
647 if (unlink_deleted_block(info, &del_block)) goto err;
648 res_length += del_block.block_len;
649 }
650 }
651
652 /* Create a delete link of the last part of the block */
653 pos = record_end + extra_length;
654 pos[0] = '\0';
655 mi_int3store(pos + 1, res_length);
656 mi_sizestore(pos + 4, info->s->state.dellink);
657 memset(pos + 12, 255, 8); /* End link */
658 next_delete_block = info->s->state.dellink;
659 info->s->state.dellink = filepos + length + extra_length;
660 info->state->del++;
661 info->state->empty += res_length;
662 info->s->state.split++;
663 }
664 if (info->opt_flag & WRITE_CACHE_USED &&
665 info->update & HA_STATE_WRITE_AT_END) {
666 if (info->update & HA_STATE_EXTEND_BLOCK) {
667 info->update &= ~HA_STATE_EXTEND_BLOCK;
668 if (my_block_write(&info->rec_cache, (uchar *)*record - head_length,
669 length + extra_length + del_length, filepos))
670 goto err;
671 } else if (my_b_write(&info->rec_cache, (uchar *)*record - head_length,
672 length + extra_length + del_length))
673 goto err;
674 } else {
675 info->rec_cache.seek_not_done = true;
676 if (info->s->file_write(info, (uchar *)*record - head_length,
677 length + extra_length + del_length, filepos,
678 info->s->write_flag))
679 goto err;
680 }
681 memcpy(record_end, temp, (size_t)(extra_length + del_length));
682 *record = record_end;
683 *reclength -= (length - head_length);
684 *flag = 6;
685
686 if (del_length) {
687 /* link the next delete block to this */
688 if (update_backward_delete_link(info, next_delete_block,
689 info->s->state.dellink))
690 goto err;
691 }
692
693 return 0;
694 err:
695 DBUG_PRINT("exit", ("errno: %d", my_errno()));
696 return 1;
697 } /*_mi_write_part_record */
698
699 /* update record from datafile */
700
update_dynamic_record(MI_INFO * info,my_off_t filepos,uchar * record,ulong reclength)701 static int update_dynamic_record(MI_INFO *info, my_off_t filepos, uchar *record,
702 ulong reclength) {
703 int flag;
704 uint error;
705 ulong length;
706 MI_BLOCK_INFO block_info;
707 DBUG_TRACE;
708
709 flag = block_info.second_read = 0;
710 /*
711 Check if we have enough room for the record.
712 First we do simplified check to make usual case faster.
713 Then we do more precise check for the space left.
714 Though it still is not absolutely precise, as
715 we always use MI_MAX_DYN_BLOCK_HEADER while it can be
716 less in the most of the cases.
717 */
718
719 /*
720 compare with just the reclength as we're going
721 to get some space from the old replaced record
722 */
723 if (unlikely(info->s->base.max_data_file_length -
724 info->state->data_file_length <
725 reclength)) {
726 /*
727 let's read the old record's block to find out the length of the
728 old record
729 */
730 if ((error = _mi_get_block_info(&block_info, info->dfile, filepos)) &
731 (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | BLOCK_FATAL_ERROR)) {
732 DBUG_PRINT("error", ("Got wrong block info"));
733 if (!(error & BLOCK_FATAL_ERROR)) set_my_errno(HA_ERR_WRONG_IN_RECORD);
734 goto err;
735 }
736
737 /*
738 if new record isn't longer, we can go on safely
739 */
740 if (block_info.rec_len < reclength) {
741 if (info->s->base.max_data_file_length - info->state->data_file_length +
742 info->state->empty - info->state->del * MI_MAX_DYN_BLOCK_HEADER <
743 reclength - block_info.rec_len + MI_MAX_DYN_BLOCK_HEADER) {
744 set_my_errno(HA_ERR_RECORD_FILE_FULL);
745 goto err;
746 }
747 }
748 block_info.second_read = 0;
749 }
750
751 while (reclength > 0) {
752 if (filepos != info->s->state.dellink) {
753 block_info.next_filepos = HA_OFFSET_ERROR;
754 if ((error = _mi_get_block_info(&block_info, info->dfile, filepos)) &
755 (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
756 BLOCK_FATAL_ERROR)) {
757 DBUG_PRINT("error", ("Got wrong block info"));
758 if (!(error & BLOCK_FATAL_ERROR)) set_my_errno(HA_ERR_WRONG_IN_RECORD);
759 goto err;
760 }
761 length = (ulong)(block_info.filepos - filepos) + block_info.block_len;
762 if (length < reclength) {
763 uint tmp = MY_ALIGN(reclength - length + 3 + (reclength >= 65520L),
764 MI_DYN_ALIGN_SIZE);
765 /* Don't create a block bigger than MI_MAX_BLOCK_LENGTH */
766 tmp = std::min(length + tmp, MI_MAX_BLOCK_LENGTH) - length;
767 /* Check if we can extend this block */
768 if (block_info.filepos + block_info.block_len ==
769 info->state->data_file_length &&
770 info->state->data_file_length <
771 info->s->base.max_data_file_length - tmp) {
772 /* extend file */
773 DBUG_PRINT("info", ("Extending file with %d bytes", tmp));
774 if (info->nextpos == info->state->data_file_length)
775 info->nextpos += tmp;
776 info->state->data_file_length += tmp;
777 info->update |= HA_STATE_WRITE_AT_END | HA_STATE_EXTEND_BLOCK;
778 length += tmp;
779 } else if (length < MI_MAX_BLOCK_LENGTH - MI_MIN_BLOCK_LENGTH) {
780 /*
781 Check if next block is a deleted block
782 Above we have MI_MIN_BLOCK_LENGTH to avoid the problem where
783 the next block is so small it can't be splited which could
784 casue problems
785 */
786
787 MI_BLOCK_INFO del_block;
788 del_block.second_read = 0;
789 if (_mi_get_block_info(&del_block, info->dfile,
790 block_info.filepos + block_info.block_len) &
791 BLOCK_DELETED) {
792 /* Use; Unlink it and extend the current block */
793 DBUG_PRINT("info", ("Extending current block"));
794 if (unlink_deleted_block(info, &del_block)) goto err;
795 if ((length += del_block.block_len) > MI_MAX_BLOCK_LENGTH) {
796 /*
797 New block was too big, link overflow part back to
798 delete list
799 */
800 my_off_t next_pos;
801 ulong rest_length = length - MI_MAX_BLOCK_LENGTH;
802 rest_length = std::max(rest_length, ulong(MI_MIN_BLOCK_LENGTH));
803 next_pos = del_block.filepos + del_block.block_len - rest_length;
804
805 if (update_backward_delete_link(info, info->s->state.dellink,
806 next_pos))
807 return 1;
808
809 /* create delete link for data that didn't fit into the page */
810 del_block.header[0] = 0;
811 mi_int3store(del_block.header + 1, rest_length);
812 mi_sizestore(del_block.header + 4, info->s->state.dellink);
813 memset(del_block.header + 12, 255, 8);
814 if (info->s->file_write(info, (uchar *)del_block.header, 20,
815 next_pos, MYF(MY_NABP)))
816 return 1;
817 info->s->state.dellink = next_pos;
818 info->s->state.split++;
819 info->state->del++;
820 info->state->empty += rest_length;
821 length -= rest_length;
822 }
823 }
824 }
825 }
826 } else {
827 if (_mi_find_writepos(info, reclength, &filepos, &length)) goto err;
828 }
829 if (_mi_write_part_record(info, filepos, length, block_info.next_filepos,
830 &record, &reclength, &flag))
831 goto err;
832 if ((filepos = block_info.next_filepos) == HA_OFFSET_ERROR) {
833 /* Start writing data on deleted blocks */
834 filepos = info->s->state.dellink;
835 }
836 }
837
838 if (block_info.next_filepos != HA_OFFSET_ERROR) {
839 /*
840 delete_dynamic_record() may change data file position.
841 IO cache must be notified as it may still have cached
842 data, which has to be flushed later.
843 */
844 info->rec_cache.seek_not_done = true;
845 if (delete_dynamic_record(info, block_info.next_filepos, 1)) goto err;
846 }
847 return 0;
848 err:
849 return 1;
850 }
851
852 /* Pack a record. Return new reclength */
853
_mi_rec_pack(MI_INFO * info,uchar * to,const uchar * from)854 uint _mi_rec_pack(MI_INFO *info, uchar *to, const uchar *from) {
855 uint length, new_length, flag, bit, i;
856 const uchar *pos, *end, *startpos;
857 uchar *packpos;
858 enum en_fieldtype type;
859 MI_COLUMNDEF *rec;
860 MI_BLOB *blob;
861 DBUG_TRACE;
862
863 flag = 0;
864 bit = 1;
865 startpos = packpos = to;
866 to += info->s->base.pack_bits;
867 blob = info->blobs;
868 rec = info->s->rec;
869
870 for (i = info->s->base.fields; i-- > 0; from += length, rec++) {
871 length = (uint)rec->length;
872 if ((type = (enum en_fieldtype)rec->type) != FIELD_NORMAL) {
873 if (type == FIELD_BLOB) {
874 if (!blob->length)
875 flag |= bit;
876 else {
877 char *temp_pos;
878 size_t tmp_length = length - portable_sizeof_char_ptr;
879 memcpy((uchar *)to, from, tmp_length);
880 memcpy(&temp_pos, from + tmp_length, sizeof(char *));
881 memcpy(to + tmp_length, temp_pos, (size_t)blob->length);
882 to += tmp_length + blob->length;
883 }
884 blob++;
885 } else if (type == FIELD_SKIP_ZERO) {
886 if (memcmp(from, zero_string, length) == 0)
887 flag |= bit;
888 else {
889 memcpy((uchar *)to, from, (size_t)length);
890 to += length;
891 }
892 } else if (type == FIELD_SKIP_ENDSPACE || type == FIELD_SKIP_PRESPACE) {
893 pos = from;
894 end = from + length;
895 if (type == FIELD_SKIP_ENDSPACE) { /* Pack trailing spaces */
896 while (end > from && *(end - 1) == ' ') end--;
897 } else { /* Pack pref-spaces */
898 while (pos < end && *pos == ' ') pos++;
899 }
900 new_length = (uint)(end - pos);
901 if (new_length + 1 + (rec->length > 255 && new_length > 127) < length) {
902 if (rec->length > 255 && new_length > 127) {
903 to[0] = (uchar)((new_length & 127) + 128);
904 to[1] = (uchar)(new_length >> 7);
905 to += 2;
906 } else
907 *to++ = (uchar)new_length;
908 memcpy((uchar *)to, pos, (size_t)new_length);
909 to += new_length;
910 flag |= bit;
911 } else {
912 memcpy(to, from, (size_t)length);
913 to += length;
914 }
915 } else if (type == FIELD_VARCHAR) {
916 uint pack_length = HA_VARCHAR_PACKLENGTH(rec->length - 1);
917 uint tmp_length;
918 if (pack_length == 1) {
919 tmp_length = (uint)*from;
920 *to++ = *from;
921 } else {
922 tmp_length = uint2korr(from);
923 store_key_length_inc(to, tmp_length);
924 }
925 memcpy(to, from + pack_length, tmp_length);
926 to += tmp_length;
927 continue;
928 } else {
929 memcpy(to, from, (size_t)length);
930 to += length;
931 continue; /* Normal field */
932 }
933 if ((bit = bit << 1) >= 256) {
934 *packpos++ = (uchar)flag;
935 bit = 1;
936 flag = 0;
937 }
938 } else {
939 memcpy(to, from, (size_t)length);
940 to += length;
941 }
942 }
943 if (bit != 1) *packpos = (uchar)flag;
944 if (info->s->calc_checksum) *to++ = (uchar)info->checksum;
945 DBUG_PRINT("exit", ("packed length: %d", (int)(to - startpos)));
946 return (uint)(to - startpos);
947 } /* _mi_rec_pack */
948
949 /*
950 Check if a record was correctly packed. Used only by myisamchk
951 Returns 0 if record is ok.
952 */
953
_mi_rec_check(MI_INFO * info,const uchar * record,uchar * rec_buff,ulong packed_length,bool with_checksum)954 bool _mi_rec_check(MI_INFO *info, const uchar *record, uchar *rec_buff,
955 ulong packed_length, bool with_checksum) {
956 uint length, new_length, flag, bit, i;
957 const uchar *pos, *end, *packpos, *to;
958 enum en_fieldtype type;
959 MI_COLUMNDEF *rec;
960 DBUG_TRACE;
961
962 packpos = rec_buff;
963 to = rec_buff + info->s->base.pack_bits;
964 rec = info->s->rec;
965 flag = *packpos;
966 bit = 1;
967
968 for (i = info->s->base.fields; i-- > 0; record += length, rec++) {
969 length = (uint)rec->length;
970 if ((type = (enum en_fieldtype)rec->type) != FIELD_NORMAL) {
971 if (type == FIELD_BLOB) {
972 uint blob_length =
973 _mi_calc_blob_length(length - portable_sizeof_char_ptr, record);
974 if (!blob_length && !(flag & bit)) goto err;
975 if (blob_length) to += length - portable_sizeof_char_ptr + blob_length;
976 } else if (type == FIELD_SKIP_ZERO) {
977 if (memcmp(record, zero_string, length) == 0) {
978 if (!(flag & bit)) goto err;
979 } else
980 to += length;
981 } else if (type == FIELD_SKIP_ENDSPACE || type == FIELD_SKIP_PRESPACE) {
982 pos = record;
983 end = record + length;
984 if (type == FIELD_SKIP_ENDSPACE) { /* Pack trailing spaces */
985 while (end > record && *(end - 1) == ' ') end--;
986 } else { /* Pack pre-spaces */
987 while (pos < end && *pos == ' ') pos++;
988 }
989 new_length = (uint)(end - pos);
990 if (new_length + 1 + (rec->length > 255 && new_length > 127) < length) {
991 if (!(flag & bit)) goto err;
992 if (rec->length > 255 && new_length > 127) {
993 /* purecov: begin inspected */
994 if (to[0] != (uchar)((new_length & 127) + 128) ||
995 to[1] != (uchar)(new_length >> 7))
996 goto err;
997 to += 2;
998 /* purecov: end */
999 } else if (*to++ != (uchar)new_length)
1000 goto err;
1001 to += new_length;
1002 } else
1003 to += length;
1004 } else if (type == FIELD_VARCHAR) {
1005 uint pack_length = HA_VARCHAR_PACKLENGTH(rec->length - 1);
1006 uint tmp_length;
1007 if (pack_length == 1) {
1008 tmp_length = (uint)*record;
1009 to += 1 + tmp_length;
1010 continue;
1011 } else {
1012 tmp_length = uint2korr(record);
1013 to += get_pack_length(tmp_length) + tmp_length;
1014 }
1015 continue;
1016 } else {
1017 to += length;
1018 continue; /* Normal field */
1019 }
1020 if ((bit = bit << 1) >= 256) {
1021 flag = *++packpos;
1022 bit = 1;
1023 }
1024 } else
1025 to += length;
1026 }
1027 if (packed_length !=
1028 (uint)(to - rec_buff) + (info->s->calc_checksum != nullptr) ||
1029 (bit != 1 && (flag & ~(bit - 1))))
1030 goto err;
1031 if (with_checksum && ((uchar)info->checksum != (uchar)*to)) {
1032 DBUG_PRINT("error", ("wrong checksum for row"));
1033 goto err;
1034 }
1035 return false;
1036
1037 err:
1038 return true;
1039 }
1040
1041 /* Unpacks a record */
1042 /* Returns -1 and my_errno =HA_ERR_RECORD_DELETED if reclength isn't */
1043 /* right. Returns reclength (>0) if ok */
1044
_mi_rec_unpack(MI_INFO * info,uchar * to,const uchar * from,ulong found_length)1045 ulong _mi_rec_unpack(MI_INFO *info, uchar *to, const uchar *from,
1046 ulong found_length) {
1047 uint flag, bit, length, rec_length, min_pack_length;
1048 enum en_fieldtype type;
1049 uchar *to_end;
1050 MI_COLUMNDEF *rec, *end_field;
1051 DBUG_TRACE;
1052
1053 to_end = to + info->s->base.reclength;
1054 const uchar *from_end = from + found_length;
1055 flag = (uchar)*from;
1056 bit = 1;
1057 const uchar *packpos = from;
1058 if (found_length < info->s->base.min_pack_length) goto err;
1059 from += info->s->base.pack_bits;
1060 min_pack_length = info->s->base.min_pack_length - info->s->base.pack_bits;
1061
1062 for (rec = info->s->rec, end_field = rec + info->s->base.fields;
1063 rec < end_field; to += rec_length, rec++) {
1064 rec_length = rec->length;
1065 if ((type = (enum en_fieldtype)rec->type) != FIELD_NORMAL &&
1066 (type != FIELD_CHECK)) {
1067 if (type == FIELD_VARCHAR) {
1068 uint pack_length = HA_VARCHAR_PACKLENGTH(rec_length - 1);
1069 if (pack_length == 1) {
1070 length = (uint)*from;
1071 if (length > rec_length - 1) goto err;
1072 *to = *from++;
1073 } else {
1074 length = get_key_length(&from);
1075 if (length > rec_length - 2) goto err;
1076 int2store(to, length);
1077 }
1078 if (from + length > from_end) goto err;
1079 memcpy(to + pack_length, from, length);
1080 from += length;
1081 min_pack_length--;
1082 continue;
1083 }
1084 if (flag & bit) {
1085 if (type == FIELD_BLOB || type == FIELD_SKIP_ZERO)
1086 memset(to, 0, rec_length);
1087 else if (type == FIELD_SKIP_ENDSPACE || type == FIELD_SKIP_PRESPACE) {
1088 if (rec->length > 255 && *from & 128) {
1089 if (from + 1 >= from_end) goto err;
1090 length = (*from & 127) + ((uint)(uchar) * (from + 1) << 7);
1091 from += 2;
1092 } else {
1093 if (from == from_end) goto err;
1094 length = (uchar)*from++;
1095 }
1096 min_pack_length--;
1097 if (length >= rec_length ||
1098 min_pack_length + length > (uint)(from_end - from))
1099 goto err;
1100 if (type == FIELD_SKIP_ENDSPACE) {
1101 memcpy(to, from, (size_t)length);
1102 memset(to + length, ' ', rec_length - length);
1103 } else {
1104 memset(to, ' ', rec_length - length);
1105 memcpy(to + rec_length - length, from, (size_t)length);
1106 }
1107 from += length;
1108 }
1109 } else if (type == FIELD_BLOB) {
1110 uint size_length = rec_length - portable_sizeof_char_ptr;
1111 ulong blob_length = _mi_calc_blob_length(size_length, from);
1112 ulong from_left = (ulong)(from_end - from);
1113 if (from_left < size_length || from_left - size_length < blob_length ||
1114 from_left - size_length - blob_length < min_pack_length)
1115 goto err;
1116 memcpy(to, from, (size_t)size_length);
1117 from += size_length;
1118 memcpy(to + size_length, &from, sizeof(char *));
1119 from += blob_length;
1120 } else {
1121 if (type == FIELD_SKIP_ENDSPACE || type == FIELD_SKIP_PRESPACE)
1122 min_pack_length--;
1123 if (min_pack_length + rec_length > (uint)(from_end - from)) goto err;
1124 memcpy(to, from, (size_t)rec_length);
1125 from += rec_length;
1126 }
1127 if ((bit = bit << 1) >= 256) {
1128 flag = (uchar) * ++packpos;
1129 bit = 1;
1130 }
1131 } else {
1132 if (min_pack_length > (uint)(from_end - from)) goto err;
1133 min_pack_length -= rec_length;
1134 memcpy(to, from, (size_t)rec_length);
1135 from += rec_length;
1136 }
1137 }
1138 if (info->s->calc_checksum) from++;
1139 if (to == to_end && from == from_end && (bit == 1 || !(flag & ~(bit - 1))))
1140 return found_length;
1141
1142 err:
1143 set_my_errno(HA_ERR_WRONG_IN_RECORD);
1144 DBUG_PRINT("error", ("to_end: %p -> %p from_end: %p -> %p", to, to_end, from,
1145 from_end));
1146 DBUG_DUMP("from", (uchar *)info->rec_buff, info->s->base.min_pack_length);
1147 return MY_FILE_ERROR;
1148 } /* _mi_rec_unpack */
1149
1150 /* Calc length of blob. Update info in blobs->length */
1151
_my_calc_total_blob_length(MI_INFO * info,const uchar * record)1152 ulong _my_calc_total_blob_length(MI_INFO *info, const uchar *record) {
1153 ulong length;
1154 MI_BLOB *blob, *end;
1155
1156 for (length = 0, blob = info->blobs, end = blob + info->s->base.blobs;
1157 blob != end; blob++) {
1158 blob->length =
1159 _mi_calc_blob_length(blob->pack_length, record + blob->offset);
1160 length += blob->length;
1161 }
1162 return length;
1163 }
1164
_mi_calc_blob_length(uint length,const uchar * pos)1165 ulong _mi_calc_blob_length(uint length, const uchar *pos) {
1166 switch (length) {
1167 case 1:
1168 return (uint)(uchar)*pos;
1169 case 2:
1170 return (uint)uint2korr(pos);
1171 case 3:
1172 return uint3korr(pos);
1173 case 4:
1174 return uint4korr(pos);
1175 default:
1176 break;
1177 }
1178 return 0; /* Impossible */
1179 }
1180
_mi_store_blob_length(uchar * pos,uint pack_length,uint length)1181 void _mi_store_blob_length(uchar *pos, uint pack_length, uint length) {
1182 switch (pack_length) {
1183 case 1:
1184 *pos = (uchar)length;
1185 break;
1186 case 2:
1187 int2store(pos, length);
1188 break;
1189 case 3:
1190 int3store(pos, length);
1191 break;
1192 case 4:
1193 int4store(pos, length);
1194 default:
1195 break;
1196 }
1197 return;
1198 }
1199
1200 /*
1201 Read record from datafile.
1202
1203 SYNOPSIS
1204 _mi_read_dynamic_record()
1205 info MI_INFO pointer to table.
1206 filepos From where to read the record.
1207 buf Destination for record.
1208
1209 NOTE
1210
1211 If a write buffer is active, it needs to be flushed if its contents
1212 intersects with the record to read. We always check if the position
1213 of the first byte of the write buffer is lower than the position
1214 past the last byte to read. In theory this is also true if the write
1215 buffer is completely below the read segment. That is, if there is no
1216 intersection. But this case is unusual. We flush anyway. Only if the
1217 first byte in the write buffer is above the last byte to read, we do
1218 not flush.
1219
1220 A dynamic record may need several reads. So this check must be done
1221 before every read. Reading a dynamic record starts with reading the
1222 block header. If the record does not fit into the free space of the
1223 header, the block may be longer than the header. In this case a
1224 second read is necessary. These one or two reads repeat for every
1225 part of the record.
1226
1227 RETURN
1228 0 OK
1229 -1 Error
1230 */
1231
_mi_read_dynamic_record(MI_INFO * info,my_off_t filepos,uchar * buf)1232 int _mi_read_dynamic_record(MI_INFO *info, my_off_t filepos, uchar *buf) {
1233 int block_of_record;
1234 uint b_type, left_length = 0;
1235 uchar *to = nullptr;
1236 MI_BLOCK_INFO block_info;
1237 File file;
1238 DBUG_TRACE;
1239
1240 if (filepos != HA_OFFSET_ERROR) {
1241 file = info->dfile;
1242 block_of_record = 0; /* First block of record is numbered as zero. */
1243 block_info.second_read = 0;
1244 do {
1245 /* A corrupted table can have wrong pointers. (Bug# 19835) */
1246 if (filepos == HA_OFFSET_ERROR) goto panic;
1247 if (info->opt_flag & WRITE_CACHE_USED &&
1248 info->rec_cache.pos_in_file < filepos + MI_BLOCK_INFO_HEADER_LENGTH &&
1249 flush_io_cache(&info->rec_cache))
1250 goto err;
1251 info->rec_cache.seek_not_done = true;
1252 if ((b_type = _mi_get_block_info(&block_info, file, filepos)) &
1253 (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
1254 BLOCK_FATAL_ERROR)) {
1255 if (b_type & (BLOCK_SYNC_ERROR | BLOCK_DELETED))
1256 set_my_errno(HA_ERR_RECORD_DELETED);
1257 goto err;
1258 }
1259 if (block_of_record++ == 0) /* First block */
1260 {
1261 if (block_info.rec_len > (uint)info->s->base.max_pack_length)
1262 goto panic;
1263 if (info->s->base.blobs) {
1264 if (!(to = mi_alloc_rec_buff(info, block_info.rec_len,
1265 &info->rec_buff)))
1266 goto err;
1267 } else
1268 to = info->rec_buff;
1269 left_length = block_info.rec_len;
1270 }
1271 if (left_length < block_info.data_len || !block_info.data_len)
1272 goto panic; /* Wrong linked record */
1273 /* copy information that is already read */
1274 {
1275 uint offset = (uint)(block_info.filepos - filepos);
1276 uint prefetch_len = (sizeof(block_info.header) - offset);
1277 filepos += sizeof(block_info.header);
1278
1279 if (prefetch_len > block_info.data_len)
1280 prefetch_len = block_info.data_len;
1281 if (prefetch_len) {
1282 memcpy((uchar *)to, block_info.header + offset, prefetch_len);
1283 block_info.data_len -= prefetch_len;
1284 left_length -= prefetch_len;
1285 to += prefetch_len;
1286 }
1287 }
1288 /* read rest of record from file */
1289 if (block_info.data_len) {
1290 if (info->opt_flag & WRITE_CACHE_USED &&
1291 info->rec_cache.pos_in_file < filepos + block_info.data_len &&
1292 flush_io_cache(&info->rec_cache))
1293 goto err;
1294 /*
1295 What a pity that this method is not called 'file_pread' and that
1296 there is no equivalent without seeking. We are at the right
1297 position already. :(
1298 */
1299 if (info->s->file_read(info, (uchar *)to, block_info.data_len, filepos,
1300 MYF(MY_NABP)))
1301 goto panic;
1302 left_length -= block_info.data_len;
1303 to += block_info.data_len;
1304 }
1305 filepos = block_info.next_filepos;
1306 } while (left_length);
1307
1308 info->update |= HA_STATE_AKTIV; /* We have a aktive record */
1309 fast_mi_writeinfo(info);
1310 return _mi_rec_unpack(info, buf, info->rec_buff, block_info.rec_len) !=
1311 MY_FILE_ERROR
1312 ? 0
1313 : -1;
1314 }
1315 fast_mi_writeinfo(info);
1316 return -1; /* Wrong data to read */
1317
1318 panic:
1319 set_my_errno(HA_ERR_WRONG_IN_RECORD);
1320 err:
1321 (void)_mi_writeinfo(info, 0);
1322 return -1;
1323 }
1324
1325 /* compare unique constraint between stored rows */
1326
_mi_cmp_dynamic_unique(MI_INFO * info,MI_UNIQUEDEF * def,const uchar * record,my_off_t pos)1327 int _mi_cmp_dynamic_unique(MI_INFO *info, MI_UNIQUEDEF *def,
1328 const uchar *record, my_off_t pos) {
1329 uchar *rec_buff, *old_record;
1330 int error;
1331 DBUG_TRACE;
1332
1333 if (!(old_record = (uchar *)my_malloc(mi_key_memory_record_buffer,
1334 info->s->base.reclength, MYF(0))))
1335 return 1;
1336
1337 /* Don't let the compare destroy blobs that may be in use */
1338 rec_buff = info->rec_buff;
1339 if (info->s->base.blobs) info->rec_buff = nullptr;
1340 error = _mi_read_dynamic_record(info, pos, old_record);
1341 if (!error)
1342 error = mi_unique_comp(def, record, old_record, def->null_are_equal);
1343 if (info->s->base.blobs) {
1344 my_free(mi_get_rec_buff_ptr(info, info->rec_buff));
1345 info->rec_buff = rec_buff;
1346 }
1347 my_free(old_record);
1348 return error;
1349 }
1350
1351 /* Compare of record one disk with packed record in memory */
1352
_mi_cmp_dynamic_record(MI_INFO * info,const uchar * record)1353 int _mi_cmp_dynamic_record(MI_INFO *info, const uchar *record) {
1354 uint flag, reclength, b_type;
1355 my_off_t filepos;
1356 uchar *buffer;
1357 MI_BLOCK_INFO block_info;
1358 DBUG_TRACE;
1359
1360 if (info->opt_flag & WRITE_CACHE_USED) {
1361 info->update &= ~(HA_STATE_WRITE_AT_END | HA_STATE_EXTEND_BLOCK);
1362 if (flush_io_cache(&info->rec_cache)) return -1;
1363 }
1364 info->rec_cache.seek_not_done = true;
1365
1366 /* If nobody have touched the database we don't have to test rec */
1367
1368 buffer = info->rec_buff;
1369 if ((info->opt_flag & READ_CHECK_USED)) { /* If check isn't disabled */
1370 if (info->s->base.blobs) {
1371 if (!(buffer =
1372 (uchar *)my_malloc(mi_key_memory_record_buffer,
1373 info->s->base.pack_reclength +
1374 _my_calc_total_blob_length(info, record),
1375 MYF(0))))
1376 return -1;
1377 }
1378 reclength = _mi_rec_pack(info, buffer, record);
1379 record = buffer;
1380
1381 filepos = info->lastpos;
1382 flag = block_info.second_read = 0;
1383 block_info.next_filepos = filepos;
1384 while (reclength > 0) {
1385 if ((b_type = _mi_get_block_info(&block_info, info->dfile,
1386 block_info.next_filepos)) &
1387 (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
1388 BLOCK_FATAL_ERROR)) {
1389 if (b_type & (BLOCK_SYNC_ERROR | BLOCK_DELETED))
1390 set_my_errno(HA_ERR_RECORD_CHANGED);
1391 goto err;
1392 }
1393 if (flag == 0) /* First block */
1394 {
1395 flag = 1;
1396 if (reclength != block_info.rec_len) {
1397 set_my_errno(HA_ERR_RECORD_CHANGED);
1398 goto err;
1399 }
1400 } else if (reclength < block_info.data_len) {
1401 set_my_errno(HA_ERR_WRONG_IN_RECORD);
1402 goto err;
1403 }
1404 reclength -= block_info.data_len;
1405 if (_mi_cmp_buffer(info->dfile, record, block_info.filepos,
1406 block_info.data_len)) {
1407 set_my_errno(HA_ERR_RECORD_CHANGED);
1408 goto err;
1409 }
1410 flag = 1;
1411 record += block_info.data_len;
1412 }
1413 }
1414 set_my_errno(0);
1415 err:
1416 if (buffer != info->rec_buff) my_free((uchar *)buffer);
1417 return my_errno();
1418 }
1419
1420 /* Compare file to buffert */
1421
_mi_cmp_buffer(File file,const uchar * buff,my_off_t filepos,uint length)1422 static int _mi_cmp_buffer(File file, const uchar *buff, my_off_t filepos,
1423 uint length) {
1424 uint next_length;
1425 uchar temp_buff[IO_SIZE * 2];
1426 DBUG_TRACE;
1427
1428 next_length = IO_SIZE * 2 - (uint)(filepos & (IO_SIZE - 1));
1429
1430 while (length > IO_SIZE * 2) {
1431 if (mysql_file_pread(file, temp_buff, next_length, filepos, MYF(MY_NABP)) ||
1432 memcmp(buff, temp_buff, next_length))
1433 goto err;
1434 filepos += next_length;
1435 buff += next_length;
1436 length -= next_length;
1437 next_length = IO_SIZE * 2;
1438 }
1439 if (mysql_file_pread(file, temp_buff, length, filepos, MYF(MY_NABP)))
1440 goto err;
1441 return memcmp(buff, temp_buff, length);
1442 err:
1443 return 1;
1444 }
1445
1446 /*
1447 Read record from datafile.
1448
1449 SYNOPSIS
1450 _mi_read_rnd_dynamic_record()
1451 info MI_INFO pointer to table.
1452 buf Destination for record.
1453 filepos From where to read the record.
1454 skip_deleted_blocks If to repeat reading until a non-deleted
1455 record is found.
1456
1457 NOTE
1458
1459 If a write buffer is active, it needs to be flushed if its contents
1460 intersects with the record to read. We always check if the position
1461 of the first byte of the write buffer is lower than the position
1462 past the last byte to read. In theory this is also true if the write
1463 buffer is completely below the read segment. That is, if there is no
1464 intersection. But this case is unusual. We flush anyway. Only if the
1465 first byte in the write buffer is above the last byte to read, we do
1466 not flush.
1467
1468 A dynamic record may need several reads. So this check must be done
1469 before every read. Reading a dynamic record starts with reading the
1470 block header. If the record does not fit into the free space of the
1471 header, the block may be longer than the header. In this case a
1472 second read is necessary. These one or two reads repeat for every
1473 part of the record.
1474
1475 RETURN
1476 0 OK
1477 != 0 Error
1478 */
1479
_mi_read_rnd_dynamic_record(MI_INFO * info,uchar * buf,my_off_t filepos,bool skip_deleted_blocks)1480 int _mi_read_rnd_dynamic_record(MI_INFO *info, uchar *buf, my_off_t filepos,
1481 bool skip_deleted_blocks) {
1482 int block_of_record, info_read, save_errno;
1483 uint left_len, b_type;
1484 uchar *to = nullptr;
1485 MI_BLOCK_INFO block_info;
1486 MYISAM_SHARE *share = info->s;
1487 DBUG_TRACE;
1488
1489 info_read = 0;
1490
1491 DBUG_EXECUTE_IF("catch_file_offset_deviation", {
1492 if (filepos) return HA_ERR_RECORD_DELETED;
1493 });
1494
1495 if (info->lock_type == F_UNLCK) {
1496 if (share->tot_locks == 0) {
1497 if (my_lock(share->kfile, F_RDLCK,
1498 MYF(MY_SEEK_NOT_DONE) | info->lock_wait))
1499 return my_errno();
1500 }
1501 } else
1502 info_read = 1; /* memory-keyinfoblock is ok */
1503
1504 block_of_record = 0; /* First block of record is numbered as zero. */
1505 block_info.second_read = 0;
1506 left_len = 1;
1507 do {
1508 if (filepos >= info->state->data_file_length) {
1509 if (!info_read) { /* Check if changed */
1510 info_read = 1;
1511 info->rec_cache.seek_not_done = true;
1512 if (mi_state_info_read_dsk(share->kfile, &share->state, true))
1513 goto panic;
1514 }
1515 if (filepos >= info->state->data_file_length) {
1516 set_my_errno(HA_ERR_END_OF_FILE);
1517 goto err;
1518 }
1519 }
1520 if (info->opt_flag & READ_CACHE_USED) {
1521 if (_mi_read_cache(
1522 &info->rec_cache, (uchar *)block_info.header, filepos,
1523 sizeof(block_info.header),
1524 (!block_of_record && skip_deleted_blocks ? READING_NEXT : 0) |
1525 READING_HEADER))
1526 goto panic;
1527 b_type = _mi_get_block_info(&block_info, -1, filepos);
1528 } else {
1529 if (info->opt_flag & WRITE_CACHE_USED &&
1530 info->rec_cache.pos_in_file < filepos + MI_BLOCK_INFO_HEADER_LENGTH &&
1531 flush_io_cache(&info->rec_cache))
1532 return my_errno();
1533 info->rec_cache.seek_not_done = true;
1534 b_type = _mi_get_block_info(&block_info, info->dfile, filepos);
1535 }
1536
1537 if (b_type &
1538 (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | BLOCK_FATAL_ERROR)) {
1539 if ((b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR)) &&
1540 skip_deleted_blocks) {
1541 filepos = block_info.filepos + block_info.block_len;
1542 block_info.second_read = 0;
1543 continue; /* Search after next_record */
1544 }
1545 if (b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR)) {
1546 /*
1547 If we're not on the first block of a record and
1548 the block is marked as deleted or out of sync,
1549 something's gone wrong: the record is damaged.
1550 */
1551 if (block_of_record != 0) goto panic;
1552
1553 set_my_errno(HA_ERR_RECORD_DELETED);
1554 info->lastpos = block_info.filepos;
1555 info->nextpos = block_info.filepos + block_info.block_len;
1556 }
1557 goto err;
1558 }
1559 if (block_of_record == 0) /* First block */
1560 {
1561 if (block_info.rec_len > (uint)share->base.max_pack_length) goto panic;
1562 info->lastpos = filepos;
1563 if (share->base.blobs) {
1564 if (!(to =
1565 mi_alloc_rec_buff(info, block_info.rec_len, &info->rec_buff)))
1566 goto err;
1567 } else
1568 to = info->rec_buff;
1569 left_len = block_info.rec_len;
1570 }
1571 if (left_len < block_info.data_len) goto panic; /* Wrong linked record */
1572
1573 /* copy information that is already read */
1574 {
1575 uint offset = (uint)(block_info.filepos - filepos);
1576 uint tmp_length = (sizeof(block_info.header) - offset);
1577 filepos = block_info.filepos;
1578
1579 if (tmp_length > block_info.data_len) tmp_length = block_info.data_len;
1580 if (tmp_length) {
1581 memcpy((uchar *)to, block_info.header + offset, tmp_length);
1582 block_info.data_len -= tmp_length;
1583 left_len -= tmp_length;
1584 to += tmp_length;
1585 filepos += tmp_length;
1586 }
1587 }
1588 /* read rest of record from file */
1589 if (block_info.data_len) {
1590 if (info->opt_flag & READ_CACHE_USED) {
1591 if (_mi_read_cache(
1592 &info->rec_cache, (uchar *)to, filepos, block_info.data_len,
1593 (!block_of_record && skip_deleted_blocks) ? READING_NEXT : 0))
1594 goto panic;
1595 } else {
1596 if (info->opt_flag & WRITE_CACHE_USED &&
1597 info->rec_cache.pos_in_file <
1598 block_info.filepos + block_info.data_len &&
1599 flush_io_cache(&info->rec_cache))
1600 goto err;
1601 /* mysql_file_seek(info->dfile, filepos, MY_SEEK_SET, MYF(0)); */
1602 if (mysql_file_read(info->dfile, (uchar *)to, block_info.data_len,
1603 MYF(MY_NABP))) {
1604 if (my_errno() == -1)
1605 set_my_errno(HA_ERR_WRONG_IN_RECORD); /* Unexpected end of file */
1606 goto err;
1607 }
1608 }
1609 }
1610 /*
1611 Increment block-of-record counter. If it was the first block,
1612 remember the position behind the block for the next call.
1613 */
1614 if (block_of_record++ == 0) {
1615 info->nextpos = block_info.filepos + block_info.block_len;
1616 skip_deleted_blocks = false;
1617 }
1618 left_len -= block_info.data_len;
1619 to += block_info.data_len;
1620 filepos = block_info.next_filepos;
1621 } while (left_len);
1622
1623 info->update |= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED;
1624 fast_mi_writeinfo(info);
1625 if (_mi_rec_unpack(info, buf, info->rec_buff, block_info.rec_len) !=
1626 MY_FILE_ERROR)
1627 return 0;
1628 return my_errno(); /* Wrong record */
1629
1630 panic:
1631 set_my_errno(HA_ERR_WRONG_IN_RECORD); /* Something is fatal wrong */
1632 err:
1633 save_errno = my_errno();
1634 (void)_mi_writeinfo(info, 0);
1635 set_my_errno(save_errno);
1636 return save_errno;
1637 }
1638
1639 /* Read and process header from a dynamic-record-file */
1640
_mi_get_block_info(MI_BLOCK_INFO * info,File file,my_off_t filepos)1641 uint _mi_get_block_info(MI_BLOCK_INFO *info, File file, my_off_t filepos) {
1642 uint return_val = 0;
1643 uchar *header = info->header;
1644
1645 if (file >= 0) {
1646 /*
1647 We do not use mysql_file_pread() here because we want to have the file
1648 pointer set to the end of the header after this function.
1649 mysql_file_pread() may leave the file pointer untouched.
1650 */
1651 mysql_file_seek(file, filepos, MY_SEEK_SET, MYF(0));
1652 if (mysql_file_read(file, header, sizeof(info->header), MYF(0)) !=
1653 sizeof(info->header))
1654 goto err;
1655 }
1656 DBUG_DUMP("header", header, MI_BLOCK_INFO_HEADER_LENGTH);
1657 if (info->second_read) {
1658 if (info->header[0] <= 6 || info->header[0] == 13)
1659 return_val = BLOCK_SYNC_ERROR;
1660 } else {
1661 if (info->header[0] > 6 && info->header[0] != 13)
1662 return_val = BLOCK_SYNC_ERROR;
1663 }
1664 info->next_filepos = HA_OFFSET_ERROR; /* Dummy if no next block */
1665
1666 switch (info->header[0]) {
1667 case 0:
1668 if ((info->block_len = (uint)mi_uint3korr(header + 1)) <
1669 MI_MIN_BLOCK_LENGTH ||
1670 (info->block_len & (MI_DYN_ALIGN_SIZE - 1)))
1671 goto err;
1672 info->filepos = filepos;
1673 info->next_filepos = mi_sizekorr(header + 4);
1674 info->prev_filepos = mi_sizekorr(header + 12);
1675 return return_val | BLOCK_DELETED; /* Deleted block */
1676
1677 case 1:
1678 info->rec_len = info->data_len = info->block_len =
1679 mi_uint2korr(header + 1);
1680 info->filepos = filepos + 3;
1681 return return_val | BLOCK_FIRST | BLOCK_LAST;
1682 case 2:
1683 info->rec_len = info->data_len = info->block_len =
1684 mi_uint3korr(header + 1);
1685 info->filepos = filepos + 4;
1686 return return_val | BLOCK_FIRST | BLOCK_LAST;
1687
1688 case 13:
1689 info->rec_len = mi_uint4korr(header + 1);
1690 info->block_len = info->data_len = mi_uint3korr(header + 5);
1691 info->next_filepos = mi_sizekorr(header + 8);
1692 info->second_read = 1;
1693 info->filepos = filepos + 16;
1694 return return_val | BLOCK_FIRST;
1695
1696 case 3:
1697 info->rec_len = info->data_len = mi_uint2korr(header + 1);
1698 info->block_len = info->rec_len + (uint)header[3];
1699 info->filepos = filepos + 4;
1700 return return_val | BLOCK_FIRST | BLOCK_LAST;
1701 case 4:
1702 info->rec_len = info->data_len = mi_uint3korr(header + 1);
1703 info->block_len = info->rec_len + (uint)header[4];
1704 info->filepos = filepos + 5;
1705 return return_val | BLOCK_FIRST | BLOCK_LAST;
1706
1707 case 5:
1708 info->rec_len = mi_uint2korr(header + 1);
1709 info->block_len = info->data_len = mi_uint2korr(header + 3);
1710 info->next_filepos = mi_sizekorr(header + 5);
1711 info->second_read = 1;
1712 info->filepos = filepos + 13;
1713 return return_val | BLOCK_FIRST;
1714 case 6:
1715 info->rec_len = mi_uint3korr(header + 1);
1716 info->block_len = info->data_len = mi_uint3korr(header + 4);
1717 info->next_filepos = mi_sizekorr(header + 7);
1718 info->second_read = 1;
1719 info->filepos = filepos + 15;
1720 return return_val | BLOCK_FIRST;
1721
1722 /* The following blocks are identical to 1-6 without rec_len */
1723 case 7:
1724 info->data_len = info->block_len = mi_uint2korr(header + 1);
1725 info->filepos = filepos + 3;
1726 return return_val | BLOCK_LAST;
1727 case 8:
1728 info->data_len = info->block_len = mi_uint3korr(header + 1);
1729 info->filepos = filepos + 4;
1730 return return_val | BLOCK_LAST;
1731
1732 case 9:
1733 info->data_len = mi_uint2korr(header + 1);
1734 info->block_len = info->data_len + (uint)header[3];
1735 info->filepos = filepos + 4;
1736 return return_val | BLOCK_LAST;
1737 case 10:
1738 info->data_len = mi_uint3korr(header + 1);
1739 info->block_len = info->data_len + (uint)header[4];
1740 info->filepos = filepos + 5;
1741 return return_val | BLOCK_LAST;
1742
1743 case 11:
1744 info->data_len = info->block_len = mi_uint2korr(header + 1);
1745 info->next_filepos = mi_sizekorr(header + 3);
1746 info->second_read = 1;
1747 info->filepos = filepos + 11;
1748 return return_val;
1749 case 12:
1750 info->data_len = info->block_len = mi_uint3korr(header + 1);
1751 info->next_filepos = mi_sizekorr(header + 4);
1752 info->second_read = 1;
1753 info->filepos = filepos + 12;
1754 return return_val;
1755 }
1756
1757 err:
1758 set_my_errno(HA_ERR_WRONG_IN_RECORD); /* Garbage */
1759 return BLOCK_ERROR;
1760 }
1761